From 9851c8285f7bf70a6cb4bede2ee94110c14acc19 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Feb 2016 10:37:59 -0800 Subject: Move the intel vulkan driver to src/intel/vulkan --- configure.ac | 4 +- src/Makefile.am | 1 - src/intel/Makefile.am | 2 +- src/intel/vulkan/.gitignore | 9 + src/intel/vulkan/Makefile.am | 203 +++ src/intel/vulkan/anv_allocator.c | 862 +++++++++ src/intel/vulkan/anv_batch_chain.c | 1077 +++++++++++ src/intel/vulkan/anv_cmd_buffer.c | 1191 +++++++++++++ src/intel/vulkan/anv_descriptor_set.c | 532 ++++++ src/intel/vulkan/anv_device.c | 1789 +++++++++++++++++++ src/intel/vulkan/anv_dump.c | 209 +++ src/intel/vulkan/anv_entrypoints_gen.py | 324 ++++ src/intel/vulkan/anv_formats.c | 603 +++++++ src/intel/vulkan/anv_gem.c | 358 ++++ src/intel/vulkan/anv_gem_stubs.c | 159 ++ src/intel/vulkan/anv_gen_macros.h | 146 ++ src/intel/vulkan/anv_image.c | 911 ++++++++++ src/intel/vulkan/anv_intel.c | 100 ++ src/intel/vulkan/anv_meta.c | 169 ++ src/intel/vulkan/anv_meta.h | 75 + src/intel/vulkan/anv_meta_blit.c | 1442 +++++++++++++++ src/intel/vulkan/anv_meta_clear.c | 1098 ++++++++++++ src/intel/vulkan/anv_meta_resolve.c | 867 +++++++++ src/intel/vulkan/anv_nir.h | 44 + src/intel/vulkan/anv_nir_apply_dynamic_offsets.c | 171 ++ src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 394 ++++ src/intel/vulkan/anv_nir_lower_push_constants.c | 77 + src/intel/vulkan/anv_pass.c | 160 ++ src/intel/vulkan/anv_pipeline.c | 1278 +++++++++++++ src/intel/vulkan/anv_pipeline_cache.c | 405 +++++ src/intel/vulkan/anv_private.h | 1876 ++++++++++++++++++++ src/intel/vulkan/anv_query.c | 187 ++ src/intel/vulkan/anv_util.c | 195 ++ src/intel/vulkan/anv_wsi.c | 196 ++ src/intel/vulkan/anv_wsi.h | 74 + src/intel/vulkan/anv_wsi_wayland.c | 871 +++++++++ src/intel/vulkan/anv_wsi_x11.c | 758 ++++++++ src/intel/vulkan/dev_icd.json.in | 7 + src/intel/vulkan/gen7_cmd_buffer.c | 589 ++++++ src/intel/vulkan/gen7_pipeline.c | 410 +++++ src/intel/vulkan/gen7_state.c | 264 +++ src/intel/vulkan/gen8_cmd_buffer.c | 914 ++++++++++ src/intel/vulkan/gen8_pipeline.c | 573 ++++++ src/intel/vulkan/gen8_state.c | 493 +++++ src/intel/vulkan/genX_cmd_buffer.c | 717 ++++++++ src/intel/vulkan/genX_pipeline.c | 126 ++ src/intel/vulkan/genX_pipeline_util.h | 327 ++++ src/intel/vulkan/genX_state_util.h | 112 ++ src/intel/vulkan/intel_icd.json.in | 7 + src/intel/vulkan/tests/.gitignore | 5 + src/intel/vulkan/tests/Makefile.am | 46 + src/intel/vulkan/tests/block_pool_no_free.c | 144 ++ src/intel/vulkan/tests/state_pool.c | 57 + src/intel/vulkan/tests/state_pool_free_list_only.c | 66 + src/intel/vulkan/tests/state_pool_no_free.c | 117 ++ src/intel/vulkan/tests/state_pool_test_helper.h | 71 + src/vulkan/.gitignore | 9 - src/vulkan/Makefile.am | 203 --- src/vulkan/anv_allocator.c | 862 --------- src/vulkan/anv_batch_chain.c | 1077 ----------- src/vulkan/anv_cmd_buffer.c | 1191 ------------- src/vulkan/anv_descriptor_set.c | 532 ------ src/vulkan/anv_device.c | 1789 ------------------- src/vulkan/anv_dump.c | 209 --- src/vulkan/anv_entrypoints_gen.py | 324 ---- src/vulkan/anv_formats.c | 603 ------- src/vulkan/anv_gem.c | 358 ---- src/vulkan/anv_gem_stubs.c | 159 -- src/vulkan/anv_gen_macros.h | 146 -- src/vulkan/anv_image.c | 911 ---------- src/vulkan/anv_intel.c | 100 -- src/vulkan/anv_meta.c | 169 -- src/vulkan/anv_meta.h | 75 - src/vulkan/anv_meta_blit.c | 1442 --------------- src/vulkan/anv_meta_clear.c | 1098 ------------ src/vulkan/anv_meta_resolve.c | 867 --------- src/vulkan/anv_nir.h | 44 - src/vulkan/anv_nir_apply_dynamic_offsets.c | 171 -- src/vulkan/anv_nir_apply_pipeline_layout.c | 394 ---- src/vulkan/anv_nir_lower_push_constants.c | 77 - src/vulkan/anv_pass.c | 160 -- src/vulkan/anv_pipeline.c | 1278 ------------- src/vulkan/anv_pipeline_cache.c | 405 ----- src/vulkan/anv_private.h | 1876 -------------------- src/vulkan/anv_query.c | 187 -- src/vulkan/anv_util.c | 195 -- src/vulkan/anv_wsi.c | 196 -- src/vulkan/anv_wsi.h | 74 - src/vulkan/anv_wsi_wayland.c | 871 --------- src/vulkan/anv_wsi_x11.c | 758 -------- src/vulkan/dev_icd.json.in | 7 - src/vulkan/gen7_cmd_buffer.c | 589 ------ src/vulkan/gen7_pipeline.c | 410 ----- src/vulkan/gen7_state.c | 264 --- src/vulkan/gen8_cmd_buffer.c | 914 ---------- src/vulkan/gen8_pipeline.c | 573 ------ src/vulkan/gen8_state.c | 493 ----- src/vulkan/genX_cmd_buffer.c | 717 -------- src/vulkan/genX_pipeline.c | 126 -- src/vulkan/genX_pipeline_util.h | 327 ---- src/vulkan/genX_state_util.h | 112 -- src/vulkan/intel_icd.json.in | 7 - src/vulkan/tests/.gitignore | 5 - src/vulkan/tests/Makefile.am | 46 - src/vulkan/tests/block_pool_no_free.c | 144 -- src/vulkan/tests/state_pool.c | 57 - src/vulkan/tests/state_pool_free_list_only.c | 66 - src/vulkan/tests/state_pool_no_free.c | 117 -- src/vulkan/tests/state_pool_test_helper.h | 71 - 109 files changed, 23858 insertions(+), 23859 deletions(-) create mode 100644 src/intel/vulkan/.gitignore create mode 100644 src/intel/vulkan/Makefile.am create mode 100644 src/intel/vulkan/anv_allocator.c create mode 100644 src/intel/vulkan/anv_batch_chain.c create mode 100644 src/intel/vulkan/anv_cmd_buffer.c create mode 100644 src/intel/vulkan/anv_descriptor_set.c create mode 100644 src/intel/vulkan/anv_device.c create mode 100644 src/intel/vulkan/anv_dump.c create mode 100644 src/intel/vulkan/anv_entrypoints_gen.py create mode 100644 src/intel/vulkan/anv_formats.c create mode 100644 src/intel/vulkan/anv_gem.c create mode 100644 src/intel/vulkan/anv_gem_stubs.c create mode 100644 src/intel/vulkan/anv_gen_macros.h create mode 100644 src/intel/vulkan/anv_image.c create mode 100644 src/intel/vulkan/anv_intel.c create mode 100644 src/intel/vulkan/anv_meta.c create mode 100644 src/intel/vulkan/anv_meta.h create mode 100644 src/intel/vulkan/anv_meta_blit.c create mode 100644 src/intel/vulkan/anv_meta_clear.c create mode 100644 src/intel/vulkan/anv_meta_resolve.c create mode 100644 src/intel/vulkan/anv_nir.h create mode 100644 src/intel/vulkan/anv_nir_apply_dynamic_offsets.c create mode 100644 src/intel/vulkan/anv_nir_apply_pipeline_layout.c create mode 100644 src/intel/vulkan/anv_nir_lower_push_constants.c create mode 100644 src/intel/vulkan/anv_pass.c create mode 100644 src/intel/vulkan/anv_pipeline.c create mode 100644 src/intel/vulkan/anv_pipeline_cache.c create mode 100644 src/intel/vulkan/anv_private.h create mode 100644 src/intel/vulkan/anv_query.c create mode 100644 src/intel/vulkan/anv_util.c create mode 100644 src/intel/vulkan/anv_wsi.c create mode 100644 src/intel/vulkan/anv_wsi.h create mode 100644 src/intel/vulkan/anv_wsi_wayland.c create mode 100644 src/intel/vulkan/anv_wsi_x11.c create mode 100644 src/intel/vulkan/dev_icd.json.in create mode 100644 src/intel/vulkan/gen7_cmd_buffer.c create mode 100644 src/intel/vulkan/gen7_pipeline.c create mode 100644 src/intel/vulkan/gen7_state.c create mode 100644 src/intel/vulkan/gen8_cmd_buffer.c create mode 100644 src/intel/vulkan/gen8_pipeline.c create mode 100644 src/intel/vulkan/gen8_state.c create mode 100644 src/intel/vulkan/genX_cmd_buffer.c create mode 100644 src/intel/vulkan/genX_pipeline.c create mode 100644 src/intel/vulkan/genX_pipeline_util.h create mode 100644 src/intel/vulkan/genX_state_util.h create mode 100644 src/intel/vulkan/intel_icd.json.in create mode 100644 src/intel/vulkan/tests/.gitignore create mode 100644 src/intel/vulkan/tests/Makefile.am create mode 100644 src/intel/vulkan/tests/block_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool.c create mode 100644 src/intel/vulkan/tests/state_pool_free_list_only.c create mode 100644 src/intel/vulkan/tests/state_pool_no_free.c create mode 100644 src/intel/vulkan/tests/state_pool_test_helper.h delete mode 100644 src/vulkan/.gitignore delete mode 100644 src/vulkan/Makefile.am delete mode 100644 src/vulkan/anv_allocator.c delete mode 100644 src/vulkan/anv_batch_chain.c delete mode 100644 src/vulkan/anv_cmd_buffer.c delete mode 100644 src/vulkan/anv_descriptor_set.c delete mode 100644 src/vulkan/anv_device.c delete mode 100644 src/vulkan/anv_dump.c delete mode 100644 src/vulkan/anv_entrypoints_gen.py delete mode 100644 src/vulkan/anv_formats.c delete mode 100644 src/vulkan/anv_gem.c delete mode 100644 src/vulkan/anv_gem_stubs.c delete mode 100644 src/vulkan/anv_gen_macros.h delete mode 100644 src/vulkan/anv_image.c delete mode 100644 src/vulkan/anv_intel.c delete mode 100644 src/vulkan/anv_meta.c delete mode 100644 src/vulkan/anv_meta.h delete mode 100644 src/vulkan/anv_meta_blit.c delete mode 100644 src/vulkan/anv_meta_clear.c delete mode 100644 src/vulkan/anv_meta_resolve.c delete mode 100644 src/vulkan/anv_nir.h delete mode 100644 src/vulkan/anv_nir_apply_dynamic_offsets.c delete mode 100644 src/vulkan/anv_nir_apply_pipeline_layout.c delete mode 100644 src/vulkan/anv_nir_lower_push_constants.c delete mode 100644 src/vulkan/anv_pass.c delete mode 100644 src/vulkan/anv_pipeline.c delete mode 100644 src/vulkan/anv_pipeline_cache.c delete mode 100644 src/vulkan/anv_private.h delete mode 100644 src/vulkan/anv_query.c delete mode 100644 src/vulkan/anv_util.c delete mode 100644 src/vulkan/anv_wsi.c delete mode 100644 src/vulkan/anv_wsi.h delete mode 100644 src/vulkan/anv_wsi_wayland.c delete mode 100644 src/vulkan/anv_wsi_x11.c delete mode 100644 src/vulkan/dev_icd.json.in delete mode 100644 src/vulkan/gen7_cmd_buffer.c delete mode 100644 src/vulkan/gen7_pipeline.c delete mode 100644 src/vulkan/gen7_state.c delete mode 100644 src/vulkan/gen8_cmd_buffer.c delete mode 100644 src/vulkan/gen8_pipeline.c delete mode 100644 src/vulkan/gen8_state.c delete mode 100644 src/vulkan/genX_cmd_buffer.c delete mode 100644 src/vulkan/genX_pipeline.c delete mode 100644 src/vulkan/genX_pipeline_util.h delete mode 100644 src/vulkan/genX_state_util.h delete mode 100644 src/vulkan/intel_icd.json.in delete mode 100644 src/vulkan/tests/.gitignore delete mode 100644 src/vulkan/tests/Makefile.am delete mode 100644 src/vulkan/tests/block_pool_no_free.c delete mode 100644 src/vulkan/tests/state_pool.c delete mode 100644 src/vulkan/tests/state_pool_free_list_only.c delete mode 100644 src/vulkan/tests/state_pool_no_free.c delete mode 100644 src/vulkan/tests/state_pool_test_helper.h diff --git a/configure.ac b/configure.ac index 604ea37..b4e2539 100644 --- a/configure.ac +++ b/configure.ac @@ -2521,6 +2521,8 @@ AC_CONFIG_FILES([Makefile src/intel/Makefile src/intel/genxml/Makefile src/intel/isl/Makefile + src/intel/vulkan/Makefile + src/intel/vulkan/tests/Makefile src/loader/Makefile src/mapi/Makefile src/mapi/es1api/glesv1_cm.pc @@ -2542,8 +2544,6 @@ AC_CONFIG_FILES([Makefile src/mesa/drivers/osmesa/osmesa.pc src/mesa/drivers/x11/Makefile src/mesa/main/tests/Makefile - src/vulkan/Makefile - src/vulkan/tests/Makefile src/util/Makefile src/util/tests/hash_table/Makefile]) diff --git a/src/Makefile.am b/src/Makefile.am index 02b8371..73686a9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -58,7 +58,6 @@ AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) if HAVE_VULKAN SUBDIRS += intel -SUBDIRS += vulkan endif AM_CPPFLAGS = \ diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am index 520602d..d5bd0b3 100644 --- a/src/intel/Makefile.am +++ b/src/intel/Makefile.am @@ -19,4 +19,4 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -SUBDIRS = genxml isl +SUBDIRS = genxml isl vulkan diff --git a/src/intel/vulkan/.gitignore b/src/intel/vulkan/.gitignore new file mode 100644 index 0000000..40afc2e --- /dev/null +++ b/src/intel/vulkan/.gitignore @@ -0,0 +1,9 @@ +# Generated source files +/*_spirv_autogen.h +/anv_entrypoints.c +/anv_entrypoints.h +/wayland-drm-protocol.c +/wayland-drm-client-protocol.h +/dev_icd.json +/intel_icd.json +/gen*_pack.h \ No newline at end of file diff --git a/src/intel/vulkan/Makefile.am b/src/intel/vulkan/Makefile.am new file mode 100644 index 0000000..2144e5a --- /dev/null +++ b/src/intel/vulkan/Makefile.am @@ -0,0 +1,203 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h + +# Used when generating entrypoints to filter out unwanted extensions +VULKAN_ENTRYPOINT_CPPFLAGS = \ + -I$(top_srcdir)/include/vulkan \ + -DVK_USE_PLATFORM_XCB_KHR \ + -DVK_USE_PLATFORM_WAYLAND_KHR + +lib_LTLIBRARIES = libvulkan_intel.la + +check_LTLIBRARIES = libvulkan-test.la + +PER_GEN_LIBS = \ + libanv-gen7.la \ + libanv-gen75.la \ + libanv-gen8.la \ + libanv-gen9.la + +noinst_LTLIBRARIES = $(PER_GEN_LIBS) + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/compiler \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/intel/ \ + -I$(top_builddir)/src \ + -I$(top_builddir)/src/compiler \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_builddir)/src/vulkan + +libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_descriptor_set.c \ + anv_device.c \ + anv_dump.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_meta_blit.c \ + anv_meta_clear.c \ + anv_meta_resolve.c \ + anv_nir_apply_dynamic_offsets.c \ + anv_nir_apply_pipeline_layout.c \ + anv_nir_lower_push_constants.c \ + anv_pass.c \ + anv_pipeline.c \ + anv_pipeline_cache.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_wsi.c \ + anv_wsi_x11.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c + +libanv_gen7_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 + +libanv_gen75_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen7_cmd_buffer.c \ + gen7_pipeline.c \ + gen7_state.c +libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 + +libanv_gen8_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 + +libanv_gen9_la_SOURCES = \ + genX_cmd_buffer.c \ + genX_pipeline.c \ + gen8_cmd_buffer.c \ + gen8_pipeline.c \ + gen8_state.c +libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 + +if HAVE_EGL_PLATFORM_WAYLAND +BUILT_SOURCES += \ + wayland-drm-protocol.c \ + wayland-drm-client-protocol.h + +%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ + +%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml + $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ + +AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm +VULKAN_SOURCES += \ + wayland-drm-protocol.c \ + anv_wsi_wayland.c +libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM +endif + +libvulkan_intel_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ + $(top_builddir)/src/intel/isl/libisl.la \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ + $(top_builddir)/src/mesa/libmesa.la \ + $(top_builddir)/src/mesa/drivers/dri/common/libdri_test_stubs.la \ + -lpthread -ldl -lstdc++ \ + $(PER_GEN_LIBS) + +libvulkan_intel_la_LDFLAGS = \ + -module -avoid-version -shared -shrext .so + + +# Generate icd files. It would be nice to just be able to add these to +# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', +# which we can't put in the icd file. When running sed from the Makefile we +# can use ${libdir}, which expands completely and we avoid putting Makefile +# variables in the icd file. + +icdconfdir=$(sysconfdir)/vulkan/icd.d +icdconf_DATA = intel_icd.json +noinst_DATA = dev_icd.json + +%.json : %.json.in + $(AM_V_GEN) $(SED) \ + -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ + -e "s#@install_libdir@#${libdir}#" < $< > $@ + + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c new file mode 100644 index 0000000..a7ae975 --- /dev/null +++ b/src/intel/vulkan/anv_allocator.c @@ -0,0 +1,862 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + int32_t *next_ptr = *map + current.offset; + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) +{ + union anv_free_list current, old, new; + int32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(util_is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->bo.size = 0; + pool->block_size = block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->back_free_list = ANV_FREE_LIST_EMPTY; + + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return; + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) + return; + + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + pool->state.next = 0; + pool->state.end = 0; + pool->back_state.next = 0; + pool->back_state.end = 0; + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.end = anv_block_pool_grow(pool, &pool->state); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +#define PAGE_SIZE 4096 + +/** Grows and re-centers the block pool. + * + * We grow the block pool in one or both directions in such a way that the + * following conditions are met: + * + * 1) The size of the entire pool is always a power of two. + * + * 2) The pool only grows on both ends. Neither end can get + * shortened. + * + * 3) At the end of the allocation, we have about twice as much space + * allocated for each end as we have used. This way the pool doesn't + * grow too far in one direction or the other. + * + * 4) If the _alloc_back() has never been called, then the back portion of + * the pool retains a size of zero. (This makes it easier for users of + * the block pool that only want a one-sided pool.) + * + * 5) We have enough space allocated for at least one more block in + * whichever side `state` points to. + * + * 6) The center of the pool is always aligned to both the block_size of + * the pool and a 4K CPU page. + */ +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) +{ + size_t size; + void *map; + uint32_t gem_handle; + struct anv_mmap_cleanup *cleanup; + + pthread_mutex_lock(&pool->device->mutex); + + assert(state == &pool->state || state == &pool->back_state); + + /* Gather a little usage information on the pool. Since we may have + * threadsd waiting in queue to get some storage while we resize, it's + * actually possible that total_used will be larger than old_size. In + * particular, block_pool_alloc() increments state->next prior to + * calling block_pool_grow, so this ensures that we get enough space for + * which ever side tries to grow the pool. + * + * We align to a page size because it makes it easier to do our + * calculations later in such a way that we state page-aigned. + */ + uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); + uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); + uint32_t total_used = front_used + back_used; + + assert(state == &pool->state || back_used > 0); + + size_t old_size = pool->bo.size; + + if (old_size != 0 && + back_used * 2 <= pool->center_bo_offset && + front_used * 2 <= (old_size - pool->center_bo_offset)) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + if (old_size == 0) { + /* This is the first allocation */ + size = MAX2(32 * pool->block_size, PAGE_SIZE); + } else { + size = old_size * 2; + } + + /* We can't have a block pool bigger than 1GB because we use signed + * 32-bit offsets in the free list and we don't want overflow. We + * should never need a block pool bigger than 1GB anyway. + */ + assert(size <= (1u << 31)); + + /* We compute a new center_bo_offset such that, when we double the size + * of the pool, we maintain the ratio of how much is used by each side. + * This way things should remain more-or-less balanced. + */ + uint32_t center_bo_offset; + if (back_used == 0) { + /* If we're in this case then we have never called alloc_back(). In + * this case, we want keep the offset at 0 to make things as simple + * as possible for users that don't care about back allocations. + */ + center_bo_offset = 0; + } else { + /* Try to "center" the allocation based on how much is currently in + * use on each side of the center line. + */ + center_bo_offset = ((uint64_t)size * back_used) / total_used; + + /* Align down to a multiple of both the block size and page size */ + uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); + assert(util_is_power_of_two(granularity)); + center_bo_offset &= ~(granularity - 1); + + assert(center_bo_offset >= back_used); + + /* Make sure we don't shrink the back end of the pool */ + if (center_bo_offset < pool->back_state.end) + center_bo_offset = pool->back_state.end; + + /* Make sure that we don't shrink the front end of the pool */ + if (size - center_bo_offset < pool->state.end) + center_bo_offset = size - pool->state.end; + } + + assert(center_bo_offset % pool->block_size == 0); + assert(center_bo_offset % PAGE_SIZE == 0); + + /* Assert that we only ever grow the pool */ + assert(center_bo_offset >= pool->back_state.end); + assert(size - center_bo_offset >= pool->state.end); + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + goto fail; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + cleanup->map = map; + cleanup->size = size; + + if (map == MAP_FAILED) + goto fail; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + goto fail; + cleanup->gem_handle = gem_handle; + +#if 0 + /* Regular objects are created I915_CACHING_CACHED on LLC platforms and + * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are + * always created as I915_CACHING_CACHED, which on non-LLC means + * snooped. That can be useful but comes with a bit of overheard. Since + * we're eplicitly clflushing and don't want the overhead we need to turn + * it off. */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); + anv_gem_set_domain(pool->device, gem_handle, + I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); + } +#endif + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map + center_bo_offset; + pool->center_bo_offset = center_bo_offset; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + +done: + pthread_mutex_unlock(&pool->device->mutex); + + /* Return the appropreate new size. This function never actually + * updates state->next. Instead, we let the caller do that because it + * needs to do so in order to maintain its concurrency model. + */ + if (state == &pool->state) { + return pool->bo.size - pool->center_bo_offset; + } else { + assert(pool->center_bo_offset > 0); + return pool->center_bo_offset; + } + +fail: + pthread_mutex_unlock(&pool->device->mutex); + + return 0; +} + +static uint32_t +anv_block_pool_alloc_new(struct anv_block_pool *pool, + struct anv_block_state *pool_state) +{ + struct anv_block_state state, old, new; + + while (1) { + state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool_state->next acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, pool_state); + assert(new.end >= new.next && new.end % pool->block_size == 0); + old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); + if (old.next != state.next) + futex_wake(&pool_state->end, INT_MAX); + return state.next; + } else { + futex_wait(&pool_state->end, state.end); + continue; + } + } +} + +int32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(offset >= 0); + assert(pool->map); + return offset; + } + + return anv_block_pool_alloc_new(pool, &pool->state); +} + +/* Allocates a block out of the back of the block pool. + * + * This will allocated a block earlier than the "start" of the block pool. + * The offsets returned from this function will be negative but will still + * be correct relative to the block pool's map pointer. + * + * If you ever use anv_block_pool_alloc_back, then you will have to do + * gymnastics with the block pool's BO when doing relocations. + */ +int32_t +anv_block_pool_alloc_back(struct anv_block_pool *pool) +{ + int32_t offset; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { + assert(offset < 0); + assert(pool->map); + return offset; + } + + offset = anv_block_pool_alloc_new(pool, &pool->back_state); + + /* The offset we get out of anv_block_pool_alloc_new() is actually the + * number of bytes downwards from the middle to the end of the block. + * We need to turn it into a (negative) offset from the middle to the + * start of the block. + */ + assert(offset >= 0); + return -(offset + pool->block_size); +} + +void +anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) +{ + if (offset < 0) { + anv_free_list_push(&pool->back_free_list, pool->map, offset); + } else { + anv_free_list_push(&pool->free_list, pool->map, offset); + } +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && util_is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + int32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { + assert(offset >= 0); + return offset; + } + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return offset; + } else { + futex_wait(&pool->block.end, block.end); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(util_is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct anv_state_stream_block { + /* The next block */ + struct anv_state_stream_block *next; + + /* The offset into the block pool at which this block starts */ + uint32_t offset; + +#ifdef HAVE_VALGRIND + /* A pointer to the first user-allocated thing in this block. This is + * what valgrind sees as the start of the block. + */ + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->block = NULL; + + /* Ensure that next + whatever > end. This way the first call to + * state_stream_alloc fetches a new block. + */ + stream->next = 1; + stream->end = 0; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + VG(const uint32_t block_size = stream->block_pool->block_size); + + struct anv_state_stream_block *next = stream->block; + while (next != NULL) { + VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); + struct anv_state_stream_block sb = VG_NOACCESS_READ(next); + VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); + VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); + anv_block_pool_free(stream->block_pool, sb.offset); + next = sb.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct anv_state_stream_block *sb = stream->block; + + struct anv_state state; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + uint32_t block = anv_block_pool_alloc(stream->block_pool); + sb = stream->block_pool->map + block; + + VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); + sb->next = stream->block; + sb->offset = block; + VG(sb->_vg_ptr = NULL); + VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); + + stream->block = sb; + stream->start = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + assert(state.offset > stream->start); + state.map = (void *)sb + (state.offset - stream->start); + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); + } else { + void *state_end = state.map + state.alloc_size; + /* This only updates the mempool. The newly allocated chunk is still + * marked as NOACCESS. */ + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); + /* Mark the newly allocated chunk as undefined */ + VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c new file mode 100644 index 0000000..d24dd06 --- /dev/null +++ b/src/intel/vulkan/anv_batch_chain.c @@ -0,0 +1,1077 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen8_pack.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (list->reloc_bos == NULL) { + anv_free(alloc, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + return anv_reloc_list_init_clone(list, alloc, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc) +{ + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_relocs == NULL) { + anv_free(alloc, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_free(alloc, list->relocs); + anv_free(alloc, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + const uint32_t domain = + target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; + + anv_reloc_list_grow(list, alloc, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = domain; + entry->write_domain = domain; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, alloc, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->alloc, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->alloc, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, + &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_free(&cmd_buffer->pool->alloc, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->last_ss_pool_bo_offset = 0; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_free(&cmd_buffer->pool->alloc, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, + struct anv_cmd_buffer *cmd_buffer, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo = NULL; + result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + return (struct anv_address) { + .bo = &cmd_buffer->device->surface_state_block_pool.bo, + .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), + }; +} + +static void +emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + /* In gen8+ the address field grew to two dwords to accomodate 48 bit + * offsets. The high 16 bits are in the last dword, so we can use the gen8 + * version in either case, as long as we set the instruction length in the + * header accordingly. This means that we always emit three dwords here + * and all the padding and adjustment we do in this file works for all + * gens. + */ + + const uint32_t gen7_length = + GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; + const uint32_t gen8_length = + GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, + .DWordLength = cmd_buffer->device->info.gen < 8 ? + gen7_length : gen8_length, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { bo, offset }); +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); + struct anv_state state; + + state.alloc_size = align_u32(entries * 4, 32); + + if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) + return (struct anv_state) { 0 }; + + state.offset = cmd_buffer->bt_next; + state.map = block_pool->map + *bt_block + state.offset; + + cmd_buffer->bt_next += state.alloc_size; + + assert(*bt_block < 0); + *state_offset = -(*bt_block); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) +{ + return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_block_pool *block_pool = + &cmd_buffer->device->surface_state_block_pool; + + int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); + if (offset == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *offset = anv_block_pool_alloc_back(block_pool); + cmd_buffer->bt_next = 0; + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + + result = anv_batch_bo_create(cmd_buffer, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_batch_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + + success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), + 8 * sizeof(int32_t)); + if (!success) + goto fail_seen_bbos; + + result = anv_reloc_list_init(&cmd_buffer->surface_relocs, + &cmd_buffer->pool->alloc); + if (result != VK_SUCCESS) + goto fail_bt_blocks; + + anv_cmd_buffer_new_binding_table_block(cmd_buffer); + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_bt_blocks: + anv_vector_finish(&cmd_buffer->bt_blocks); + fail_seen_bbos: + anv_vector_finish(&cmd_buffer->seen_bbos); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, cmd_buffer); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + int32_t *bt_block; + anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + anv_vector_finish(&cmd_buffer->bt_blocks); + + anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { + int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); + anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, + *bt_block); + } + assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); + cmd_buffer->bt_next = 0; + + cmd_buffer->surface_relocs.num_relocs = 0; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* When we start a batch buffer, we subtract a certain amount of + * padding from the end to ensure that we always have room to emit a + * BATCH_BUFFER_START to chain to the next BO. We need to remove + * that padding before we end the batch; otherwise, we may end up + * with our BATCH_BUFFER_END in another BO. + */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (!(cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* When we chain, we need to add an MI_BATCH_BUFFER_START command + * with its relocation. In order to handle this we'll increment here + * so we can unconditionally decrement right before adding the + * MI_BATCH_BUFFER_START command. + */ + batch_bo->relocs.num_relocs++; + cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + anv_cmd_buffer_emit_state_base_address(primary); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + emit_batch_buffer_start(primary, &first_bbo->bo, 0); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; + + /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we + * can emit a new command and relocation for the current splice. In + * order to handle the initial-use case, we incremented next and + * num_relocs in end_batch_buffer() so we can alyways just subtract + * here. + */ + last_bbo->relocs.num_relocs--; + secondary->batch.next -= inst_size; + emit_batch_buffer_start(secondary, &this_bbo->bo, offset); + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + + /* After patching up the secondary buffer, we need to clflush the + * modified instruction in case we're on a !llc platform. We use a + * little loop to handle the case where the instruction crosses a cache + * line boundary. + */ + if (!primary->device->info.has_llc) { + void *inst = secondary->batch.next - inst_size; + void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); + __builtin_ia32_mfence(); + while (p < secondary->batch.next) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, + &secondary->surface_relocs, 0); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_objects == NULL) { + anv_free(&cmd_buffer->pool->alloc, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* A quick sanity check on relocations */ + assert(relocs->relocs[i].offset < bo->size); + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +static uint64_t +read_reloc(const struct anv_device *device, const void *p) +{ + if (device->info.gen >= 8) + return *(uint64_t *)p; + else + return *(uint32_t *)p; +} + +static void +write_reloc(const struct anv_device *device, void *p, uint64_t v) +{ + if (device->info.gen >= 8) + *(uint64_t *)p = v; + else + *(uint32_t *)p = v; +} + +static void +adjust_relocations_from_block_pool(struct anv_block_pool *pool, + struct anv_reloc_list *relocs) +{ + for (size_t i = 0; i < relocs->num_relocs; i++) { + /* In general, we don't know how stale the relocated value is. It + * may have been used last time or it may not. Since we don't want + * to stomp it while the GPU may be accessing it, we haven't updated + * it anywhere else in the code. Instead, we just set the presumed + * offset to what it is now based on the delta and the data in the + * block pool. Then the kernel will update it for us if needed. + */ + assert(relocs->relocs[i].offset < pool->state.end); + const void *p = pool->map + relocs->relocs[i].offset; + + /* We're reading back the relocated value from potentially incoherent + * memory here. However, any change to the value will be from the kernel + * writing out relocations, which will keep the CPU cache up to date. + */ + relocs->relocs[i].presumed_offset = + read_reloc(pool->device, p) - relocs->relocs[i].delta; + + /* All of the relocations from this block pool to other BO's should + * have been emitted relative to the surface block pool center. We + * need to add the center offset to make them relative to the + * beginning of the actual GEM bo. + */ + relocs->relocs[i].offset += pool->center_bo_offset; + } +} + +static void +adjust_relocations_to_block_pool(struct anv_block_pool *pool, + struct anv_bo *from_bo, + struct anv_reloc_list *relocs, + uint32_t *last_pool_center_bo_offset) +{ + assert(*last_pool_center_bo_offset <= pool->center_bo_offset); + uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; + + /* When we initially emit relocations into a block pool, we don't + * actually know what the final center_bo_offset will be so we just emit + * it as if center_bo_offset == 0. Now that we know what the center + * offset is, we need to walk the list of relocations and adjust any + * relocations that point to the pool bo with the correct offset. + */ + for (size_t i = 0; i < relocs->num_relocs; i++) { + if (relocs->reloc_bos[i] == &pool->bo) { + /* Adjust the delta value in the relocation to correctly + * correspond to the new delta. Initially, this value may have + * been negative (if treated as unsigned), but we trust in + * uint32_t roll-over to fix that for us at this point. + */ + relocs->relocs[i].delta += delta; + + /* Since the delta has changed, we need to update the actual + * relocated value with the new presumed value. This function + * should only be called on batch buffers, so we know it isn't in + * use by the GPU at the moment. + */ + assert(relocs->relocs[i].offset < from_bo->size); + write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, + relocs->relocs[i].presumed_offset + + relocs->relocs[i].delta); + } + } + + *last_pool_center_bo_offset = pool->center_bo_offset; +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_block_pool *ss_pool = + &cmd_buffer->device->surface_state_block_pool; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); + anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + &(*bbo)->last_ss_pool_bo_offset); + + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + } + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; + cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; + cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; + first_batch_bo->bo.index = last_idx; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); + + if (!cmd_buffer->device->info.has_llc) { + __builtin_ia32_mfence(); + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { + for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) + __builtin_ia32_clflush((*bbo)->bo.map + i); + } + } + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | + I915_EXEC_CONSTANTS_REL_GENERAL, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c new file mode 100644 index 0000000..b060828 --- /dev/null +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -0,0 +1,1191 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +/* TODO: These are taken from GLES. We should check the Vulkan spec */ +const struct anv_dynamic_state default_dynamic_state = { + .viewport = { + .count = 0, + }, + .scissor = { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, + .depth_bounds = { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = { + .front = 0u, + .back = 0u, + }, +}; + +void +anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask) +{ + if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + dest->viewport.count = src->viewport.count; + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, + src->viewport.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + dest->scissor.count = src->scissor.count; + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, + src->scissor.count); + } + + if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) + dest->line_width = src->line_width; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) + dest->depth_bias = src->depth_bias; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + + if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) + dest->depth_bounds = src->depth_bounds; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) + dest->stencil_compare_mask = src->stencil_compare_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) + dest->stencil_write_mask = src->stencil_write_mask; + + if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + dest->stencil_reference = src->stencil_reference; +} + +static void +anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + + memset(&state->descriptors, 0, sizeof(state->descriptors)); + memset(&state->push_constants, 0, sizeof(state->push_constants)); + memset(state->binding_tables, 0, sizeof(state->binding_tables)); + memset(state->samplers, 0, sizeof(state->samplers)); + + /* 0 isn't a valid config. This ensures that we always configure L3$. */ + cmd_buffer->state.current_l3_config = 0; + + state->dirty = ~0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->push_constants_dirty = 0; + state->pipeline = NULL; + state->restart_index = UINT32_MAX; + state->dynamic = default_dynamic_state; + state->need_query_wa = true; + + if (state->attachments != NULL) { + anv_free(&cmd_buffer->pool->alloc, state->attachments); + state->attachments = NULL; + } + + state->gen7.index_buffer = NULL; +} + +/** + * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. + */ +void +anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info) +{ + struct anv_cmd_state *state = &cmd_buffer->state; + ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); + + anv_free(&cmd_buffer->pool->alloc, state->attachments); + + if (pass->attachment_count == 0) { + state->attachments = NULL; + return; + } + + state->attachments = anv_alloc(&cmd_buffer->pool->alloc, + pass->attachment_count * + sizeof(state->attachments[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ + abort(); + } + + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags clear_aspects = 0; + + if (anv_format_is_color(att->format)) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if (att->format->has_depth && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (att->format->has_stencil && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } + + state->attachments[i].pending_clear_aspects = clear_aspects; + if (clear_aspects) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } + } +} + +static VkResult +anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, uint32_t size) +{ + struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; + + if (*ptr == NULL) { + *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else if ((*ptr)->size < size) { + *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (*ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + (*ptr)->size = size; + + return VK_SUCCESS; +} + +#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ + anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ + (offsetof(struct anv_push_constants, field) + \ + sizeof(cmd_buffer->state.push_constants[0]->field))) + +static VkResult anv_create_cmd_buffer( + struct anv_device * device, + struct anv_cmd_pool * pool, + VkCommandBufferLevel level, + VkCommandBuffer* pCommandBuffer) +{ + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; + cmd_buffer->state.attachments = NULL; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); + + return result; +} + +VkResult anv_AllocateCommandBuffers( + VkDevice _device, + const VkCommandBufferAllocateInfo* pAllocateInfo, + VkCommandBuffer* pCommandBuffers) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, + &pCommandBuffers[i]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) + anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, + i, pCommandBuffers); + + return result; +} + +static void +anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) +{ + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + + anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + anv_free(&cmd_buffer->pool->alloc, cmd_buffer); +} + +void anv_FreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer* pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + + anv_cmd_buffer_destroy(cmd_buffer); + } +} + +VkResult anv_ResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->usage_flags = 0; + cmd_buffer->state.current_pipeline = UINT32_MAX; + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + anv_cmd_state_reset(cmd_buffer); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + if (cmd_buffer->device->info.is_haswell) + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + else + return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + case 8: + return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); + case 9: + return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_BeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); + + struct anv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + + anv_cmd_buffer_set_subpass(cmd_buffer, subpass); + } + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; + cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; + + /* Apply the dynamic state from the pipeline */ + cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, + &pipeline->dynamic_state, + pipeline->dynamic_state_mask); + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport* pViewports) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstViewport + viewportCount; + if (cmd_buffer->state.dynamic.viewport.count < total_count); + cmd_buffer->state.dynamic.viewport.count = total_count; + + memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, + pViewports, viewportCount * sizeof(*pViewports)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void anv_CmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D* pScissors) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + const uint32_t total_count = firstScissor + scissorCount; + if (cmd_buffer->state.dynamic.scissor.count < total_count); + cmd_buffer->state.dynamic.scissor.count = total_count; + + memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, + pScissors, scissorCount * sizeof(*pScissors)); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void anv_CmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void anv_CmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; + cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; + cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void anv_CmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + memcpy(cmd_buffer->state.dynamic.blend_constants, + blendConstants, sizeof(float) * 4); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void anv_CmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; + cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void anv_CmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void anv_CmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void anv_CmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void anv_CmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + descriptorSetCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + if (cmd_buffer->state.descriptors[firstSet + i] != set) { + cmd_buffer->state.descriptors[firstSet + i] = set; + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + } + + if (set_layout->dynamic_offset_count > 0) { + anv_foreach_stage(s, set_layout->shader_stages) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); + + struct anv_push_constants *push = + cmd_buffer->state.push_constants[s]; + + unsigned d = layout->set[firstSet + i].dynamic_offset_start; + const uint32_t *offsets = pDynamicOffsets + dynamic_slot; + struct anv_descriptor *desc = set->descriptors; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index < 0) + continue; + + unsigned array_size = set_layout->binding[b].array_size; + for (unsigned j = 0; j < array_size; j++) { + uint32_t range = 0; + if (desc->buffer_view) + range = desc->buffer_view->range; + push->dynamic[d].offset = *(offsets++); + push->dynamic[d].range = range; + desc++; + d++; + } + } + } + cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; + } + } +} + +void anv_CmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[firstBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); + } +} + +static void +add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, + struct anv_state state, struct anv_bo *bo, uint32_t offset) +{ + /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and + * 9 for gen8+. We only write the first dword for gen8+ here and rely on + * the initial state to set the high bits to 0. */ + + const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; + + anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, + state.offset + dword * 4, bo, offset); +} + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); + + default: + unreachable("Invalid descriptor type"); + } +} + +VkResult +anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, + struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_bind_map *map; + uint32_t color_count, bias, state_offset; + + switch (stage) { + case MESA_SHADER_FRAGMENT: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = MAX_RTS; + color_count = subpass->color_count; + break; + case MESA_SHADER_COMPUTE: + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + bias = 1; + color_count = 0; + break; + default: + map = &cmd_buffer->state.pipeline->bindings[stage]; + bias = 0; + color_count = 0; + break; + } + + if (color_count + map->surface_count == 0) { + *bt_state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, + bias + map->surface_count, + &state_offset); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t a = 0; a < color_count; a++) { + const struct anv_image_view *iview = + fb->attachments[subpass->color_attachments[a]]; + + assert(iview->color_rt_surface_state.alloc_size); + bt_map[a] = iview->color_rt_surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, + iview->bo, iview->offset); + } + + if (stage == MESA_SHADER_COMPUTE && + cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; + uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; + + struct anv_state surface_state; + surface_state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer); + + const struct anv_format *format = + anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, + format->isl_format, bo_offset, 12, 1); + + bt_map[0] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + + if (map->surface_count == 0) + goto out; + + if (map->image_count > 0) { + VkResult result = + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); + if (result != VK_SUCCESS) + return result; + + cmd_buffer->state.push_constants_dirty |= 1 << stage; + } + + uint32_t image = 0; + for (uint32_t s = 0; s < map->surface_count; s++) { + struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + struct anv_state surface_state; + struct anv_bo *bo; + uint32_t bo_offset; + + switch (desc->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + /* Nothing for us to do here */ + continue; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + surface_state = desc->image_view->sampler_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + surface_state = desc->image_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->image_view->bo; + bo_offset = desc->image_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, + image_param); + image_param->surface_idx = bias + s; + break; + } + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + surface_state = desc->buffer_view->surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + surface_state = desc->buffer_view->storage_surface_state; + assert(surface_state.alloc_size); + bo = desc->buffer_view->bo; + bo_offset = desc->buffer_view->offset; + + struct brw_image_param *image_param = + &cmd_buffer->state.push_constants[stage]->images[image++]; + + anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, + image_param); + image_param->surface_idx = bias + s; + break; + + default: + assert(!"Invalid descriptor type"); + continue; + } + + bt_map[bias + s] = surface_state.offset + state_offset; + add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); + } + assert(image == map->image_count); + + out: + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*bt_state); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage, struct anv_state *state) +{ + struct anv_pipeline_bind_map *map; + + if (stage == MESA_SHADER_COMPUTE) + map = &cmd_buffer->state.compute_pipeline->bindings[stage]; + else + map = &cmd_buffer->state.pipeline->bindings[stage]; + + if (map->sampler_count == 0) { + *state = (struct anv_state) { 0, }; + return VK_SUCCESS; + } + + uint32_t size = map->sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t s = 0; s < map->sampler_count; s++) { + struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; + struct anv_descriptor_set *set = + cmd_buffer->state.descriptors[binding->set]; + struct anv_descriptor *desc = &set->descriptors[binding->offset]; + + if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && + desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + struct anv_sampler *sampler = desc->sampler; + + /* This can happen if we have an unfilled slot since TYPE_SAMPLER + * happens to be zero. + */ + if (sampler == NULL) + continue; + + memcpy(state->map + (s * 16), + sampler->state, sizeof(sampler->state)); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(*state); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); + memcpy(state.map, data, size); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); + + return state; +} + +struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +/** + * @brief Setup the command buffer for recording commands inside the given + * subpass. + * + * This does not record all commands needed for starting the subpass. + * Starting the subpass may require additional commands. + * + * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer + * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the + * command buffer for recording commands for some subpass. But only the first + * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. + */ +void +anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + switch (cmd_buffer->device->info.gen) { + case 7: + gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 8: + gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + case 9: + gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); + break; + default: + unreachable("unsupported gen\n"); + } +} + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[stage]; + struct brw_stage_prog_data *prog_data = + cmd_buffer->state.pipeline->prog_data[stage]; + + /* If we don't actually have any push constants, bail. */ + if (data == NULL || prog_data->nr_params == 0) + return (struct anv_state) { .offset = 0 }; + + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + prog_data->nr_params * sizeof(float), + 32 /* bottom 5 bits MBZ */); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_push_constants *data = + cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + const unsigned push_constant_data_size = + (local_id_dwords + prog_data->nr_params) * 4; + const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + const unsigned param_aligned_count = + reg_aligned_constant_size / sizeof(uint32_t); + + /* If we don't actually have any push constants, bail. */ + if (reg_aligned_constant_size == 0) + return (struct anv_state) { .offset = 0 }; + + const unsigned threads = pipeline->cs_thread_width_max; + const unsigned total_push_constants_size = + reg_aligned_constant_size * threads; + const unsigned push_constant_alignment = + cmd_buffer->device->info.gen < 8 ? 32 : 64; + const unsigned aligned_total_push_constants_size = + ALIGN(total_push_constants_size, push_constant_alignment); + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + aligned_total_push_constants_size, + push_constant_alignment); + + /* Walk through the param array and fill the buffer with data */ + uint32_t *u32_map = state.map; + + brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, + reg_aligned_constant_size); + + /* Setup uniform data for the first thread */ + for (unsigned i = 0; i < prog_data->nr_params; i++) { + uint32_t offset = (uintptr_t)prog_data->param[i]; + u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); + } + + /* Copy uniform data from the first thread to every other thread */ + const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); + for (unsigned t = 1; t < threads; t++) { + memcpy(&u32_map[t * param_aligned_count + local_id_dwords], + &u32_map[local_id_dwords], + uniform_data_size); + } + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +void anv_CmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void* pValues) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_foreach_stage(stage, stageFlags) { + anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); + + memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + pValues, size); + } + + cmd_buffer->state.push_constants_dirty |= stageFlags; +} + +void anv_CmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCommandPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkCommandPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->alloc; + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void anv_DestroyCommandPool( + VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + anv_ResetCommandPool(_device, commandPool, 0); + + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + + /* FIXME: vkResetCommandPool must not destroy its command buffers. The + * Vulkan 1.0 spec requires that it only reset them: + * + * Resetting a command pool recycles all of the resources from all of + * the command buffers allocated from the command pool back to the + * command pool. All command buffers that have been allocated from the + * command pool are put in the initial state. + */ + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_cmd_buffer_destroy(cmd_buffer); + } + + return VK_SUCCESS; +} + +/** + * Return NULL if the current subpass has no depthstencil attachment. + */ +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + + if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) + return NULL; + + const struct anv_image_view *iview = + fb->attachments[subpass->depth_stencil_attachment]; + + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + return iview; +} diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c new file mode 100644 index 0000000..7a77336 --- /dev/null +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/* + * Descriptor set layouts. + */ + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t max_binding = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); + if (pCreateInfo->pBindings[j].pImmutableSamplers) + immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; + } + + size_t size = sizeof(struct anv_descriptor_set_layout) + + (max_binding + 1) * sizeof(set_layout->binding[0]) + + immutable_sampler_count * sizeof(struct anv_sampler *); + + set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* We just allocate all the samplers at the end of the struct */ + struct anv_sampler **samplers = + (struct anv_sampler **)&set_layout->binding[max_binding + 1]; + + set_layout->binding_count = max_binding + 1; + set_layout->shader_stages = 0; + set_layout->size = 0; + + for (uint32_t b = 0; b <= max_binding; b++) { + /* Initialize all binding_layout entries to -1 */ + memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); + + set_layout->binding[b].immutable_samplers = NULL; + } + + /* Initialize all samplers to 0 */ + memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); + + uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; + uint32_t buffer_count = 0; + uint32_t dynamic_offset_count = 0; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + uint32_t b = binding->binding; + + assert(binding->descriptorCount > 0); + set_layout->binding[b].array_size = binding->descriptorCount; + set_layout->binding[b].descriptor_index = set_layout->size; + set_layout->size += binding->descriptorCount; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; + sampler_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].buffer_index = buffer_count; + buffer_count += binding->descriptorCount; + /* fall through */ + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].surface_index = surface_count[s]; + surface_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; + dynamic_offset_count += binding->descriptorCount; + break; + default: + break; + } + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_foreach_stage(s, binding->stageFlags) { + set_layout->binding[b].stage[s].image_index = image_count[s]; + image_count[s] += binding->descriptorCount; + } + break; + default: + break; + } + + if (binding->pImmutableSamplers) { + set_layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; + + for (uint32_t i = 0; i < binding->descriptorCount; i++) + set_layout->binding[b].immutable_samplers[i] = + anv_sampler_from_handle(binding->pImmutableSamplers[i]); + } else { + set_layout->binding[b].immutable_samplers = NULL; + } + + set_layout->shader_stages |= binding->stageFlags; + } + + set_layout->buffer_count = buffer_count; + set_layout->dynamic_offset_count = dynamic_offset_count; + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +void anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_free2(&device->alloc, pAllocator, set_layout); +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just muttiple descriptor set layouts pasted together + */ + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + + unsigned dynamic_offset_count = 0; + + memset(layout->stage, 0, sizeof(layout->stage)); + for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->set[set].layout = set_layout; + + layout->set[set].dynamic_offset_start = dynamic_offset_count; + for (uint32_t b = 0; b < set_layout->binding_count; b++) { + if (set_layout->binding[b].dynamic_offset_index >= 0) + dynamic_offset_count += set_layout->binding[b].array_size; + for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { + if (set_layout->binding[b].stage[s].surface_index >= 0) + layout->stage[s].has_dynamic_offsets = true; + } + } + } + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_free2(&device->alloc, pAllocator, pipeline_layout); +} + +/* + * Descriptor pools. These are a no-op for now. + */ + +VkResult anv_CreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + *pDescriptorPool = (VkDescriptorPool)1; + return VK_SUCCESS; +} + +void anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); + + set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + set->layout = layout; + + /* Go through and fill out immutable samplers if we have any */ + struct anv_descriptor *desc = set->descriptors; + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].immutable_samplers) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) + desc[i].sampler = layout->binding[b].immutable_samplers[i]; + } + desc += layout->binding[b].array_size; + } + + /* XXX: Use the pool */ + set->buffer_views = + anv_alloc(&device->alloc, + sizeof(set->buffer_views[0]) * layout->buffer_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!set->buffer_views) { + anv_free(&device->alloc, set); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < layout->buffer_count; b++) { + set->buffer_views[b].surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + set->buffer_count = layout->buffer_count; + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + /* XXX: Use the pool */ + for (uint32_t b = 0; b < set->buffer_count; b++) + anv_state_pool_free(&device->surface_state_pool, + set->buffer_views[b].surface_state); + + anv_free(&device->alloc, set->buffer_views); + anv_free(&device->alloc, set); +} + +VkResult anv_AllocateDescriptorSets( + VkDevice _device, + const VkDescriptorSetAllocateInfo* pAllocateInfo, + VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result = VK_SUCCESS; + struct anv_descriptor_set *set; + uint32_t i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) + anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, + i, pDescriptorSets); + + return result; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +void anv_UpdateDescriptorSets( + VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < descriptorWriteCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); + const struct anv_descriptor_set_binding_layout *bind_layout = + &set->layout->binding[write->dstBinding]; + struct anv_descriptor *desc = + &set->descriptors[bind_layout->descriptor_index]; + desc += write->dstArrayElement; + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j] = (struct anv_descriptor) { + .type = VK_DESCRIPTOR_TYPE_SAMPLER, + .sampler = sampler, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + ANV_FROM_HANDLE(anv_sampler, sampler, + write->pImageInfo[j].sampler); + + desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + desc[j].image_view = iview; + + /* If this descriptor has an immutable sampler, we don't want + * to stomp on it. + */ + if (sampler) + desc[j].sampler = sampler; + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pImageInfo[j].imageView); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .image_view = iview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pTexelBufferView[j]); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = bview, + }; + } + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + assert(write->pBufferInfo[j].buffer); + ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); + assert(buffer); + + struct anv_buffer_view *view = + &set->buffer_views[bind_layout->buffer_index]; + view += write->dstArrayElement + j; + + const struct anv_format *format = + anv_format_for_descriptor_type(write->descriptorType); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + write->pBufferInfo[j].offset; + + /* For buffers with dynamic offsets, we use the full possible + * range in the surface state and do the actual range-checking + * in the shader. + */ + if (bind_layout->dynamic_offset_index >= 0 || + write->pBufferInfo[j].range == VK_WHOLE_SIZE) + view->range = buffer->size - write->pBufferInfo[j].offset; + else + view->range = write->pBufferInfo[j].range; + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, 1); + + desc[j] = (struct anv_descriptor) { + .type = write->descriptorType, + .buffer_view = view, + }; + + } + + default: + break; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); + ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); + + const struct anv_descriptor_set_binding_layout *src_layout = + &src->layout->binding[copy->srcBinding]; + struct anv_descriptor *src_desc = + &src->descriptors[src_layout->descriptor_index]; + src_desc += copy->srcArrayElement; + + const struct anv_descriptor_set_binding_layout *dst_layout = + &dst->layout->binding[copy->dstBinding]; + struct anv_descriptor *dst_desc = + &dst->descriptors[dst_layout->descriptor_index]; + dst_desc += copy->dstArrayElement; + + for (uint32_t j = 0; j < copy->descriptorCount; j++) + dst_desc[j] = src_desc[j]; + } +} diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c new file mode 100644 index 0000000..a8835f7 --- /dev/null +++ b/src/intel/vulkan/anv_device.c @@ -0,0 +1,1789 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" +#include "util/strtod.h" +#include "util/debug.h" + +#include "genxml/gen7_pack.h" + +struct anv_dispatch_table dtable; + +static void +compiler_debug_log(void *data, const char *fmt, ...) +{ } + +static void +compiler_perf_log(void *data, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + vfprintf(stderr, fmt, args); + + va_end(args); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + VkResult result; + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to open %s: %m", path); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = instance; + device->path = path; + + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + if (!device->chipset_id) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get chipset id: %m"); + goto fail; + } + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id); + if (!device->info) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get device info"); + goto fail; + } + + if (device->info->is_haswell) { + fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && !device->info->is_baytrail) { + fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + } else if (device->info->gen == 7 && device->info->is_baytrail) { + fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); + } else if (device->info->gen >= 8) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully + * supported as anything */ + } else { + result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Vulkan not yet supported on %s", device->name); + goto fail; + } + + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "failed to get aperture size: %m"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing gem wait"); + goto fail; + } + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing execbuf2"); + goto fail; + } + + if (!device->info->has_llc && + anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { + result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + "kernel missing wc mmap"); + goto fail; + } + + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); + + close(fd); + + brw_process_intel_debug_variable(); + + device->compiler = brw_compiler_create(NULL, device->info); + if (device->compiler == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; + + /* XXX: Actually detect bit6 swizzling */ + isl_device_init(&device->isl_dev, device->info, swizzled); + + return VK_SUCCESS; + +fail: + close(fd); + return result; +} + +static void +anv_physical_device_finish(struct anv_physical_device *device) +{ + ralloc_free(device->compiler); +} + +static const VkExtensionProperties global_extensions[] = { + { + .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, + .specVersion = 25, + }, + { + .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, + .specVersion = 5, + }, +#ifdef HAVE_WAYLAND_PLATFORM + { + .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + .specVersion = 4, + }, +#endif +}; + +static const VkExtensionProperties device_extensions[] = { + { + .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, + .specVersion = 67, + }, +}; + +static void * +default_alloc_func(void *pUserData, size_t size, size_t align, + VkSystemAllocationScope allocationScope) +{ + return malloc(size); +} + +static void * +default_realloc_func(void *pUserData, void *pOriginal, size_t size, + size_t align, VkSystemAllocationScope allocationScope) +{ + return realloc(pOriginal, size); +} + +static void +default_free_func(void *pUserData, void *pMemory) +{ + free(pMemory); +} + +static const VkAllocationCallbacks default_alloc = { + .pUserData = NULL, + .pfnAllocation = default_alloc_func, + .pfnReallocation = default_realloc_func, + .pfnFree = default_free_func, +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + struct anv_instance *instance; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + uint32_t client_version = pCreateInfo->pApplicationInfo ? + pCreateInfo->pApplicationInfo->apiVersion : + VK_MAKE_VERSION(1, 0, 0); + if (VK_MAKE_VERSION(1, 0, 0) > client_version || + client_version > VK_MAKE_VERSION(1, 0, 3)) { + return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + "Client requested version %d.%d.%d", + VK_VERSION_MAJOR(client_version), + VK_VERSION_MINOR(client_version), + VK_VERSION_PATCH(client_version)); + } + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + global_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + + if (pAllocator) + instance->alloc = *pAllocator; + else + instance->alloc = default_alloc; + + instance->apiVersion = client_version; + instance->physicalDeviceCount = -1; + + _mesa_locale_init(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + anv_init_wsi(instance); + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +void anv_DestroyInstance( + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + if (instance->physicalDeviceCount > 0) { + /* We support at most one physical device. */ + assert(instance->physicalDeviceCount == 1); + anv_physical_device_finish(&instance->physicalDevice); + } + + anv_finish_wsi(instance); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + _mesa_locale_fini(); + + anv_free(&instance->alloc, instance); +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount < 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { + instance->physicalDeviceCount = 0; + } else if (result == VK_SUCCESS) { + instance->physicalDeviceCount = 1; + } else { + return result; + } + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = true, + .fullDrawIndexUint32 = true, + .imageCubeArray = false, + .independentBlend = pdevice->info->gen >= 8, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSrcBlend = true, + .logicOp = true, + .multiDrawIndirect = false, + .drawIndirectFirstInstance = false, + .depthClamp = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = false, /* FINISHME */ + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderInt16 = false, + .alphaToOne = true, + .variableMultisampleRate = false, + .inheritedQueries = false, + }; +} + +void +anv_device_get_cache_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); +} + +void anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + const struct brw_device_info *devinfo = pdevice->info; + + anv_finishme("Get correct values for VkPhysicalDeviceLimits"); + + const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; + + VkSampleCountFlags sample_counts = + isl_device_get_sample_counts(&pdevice->isl_dev); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0, + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageResources = 128, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetUniformBuffersDynamic = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetStorageBuffersDynamic = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxDescriptorSetInputAttachments = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 0, + .maxTessellationPatchSize = 0, + .maxTessellationControlPerVertexInputComponents = 0, + .maxTessellationControlPerVertexOutputComponents = 0, + .maxTessellationControlPerPatchOutputComponents = 0, + .maxTessellationControlTotalOutputComponents = 0, + .maxTessellationEvaluationInputComponents = 0, + .maxTessellationEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -16384.0, 16384.0 }, + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -8, + .maxTexelGatherOffset = 7, + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = VK_MAKE_VERSION(1, 0, 2), + .driverVersion = 1, + .vendorID = 0x8086, + .deviceID = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ + }; + + strcpy(pProperties->deviceName, pdevice->name); + anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); +} + +void anv_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties* pQueueFamilyProperties) +{ + if (pQueueFamilyProperties == NULL) { + *pCount = 1; + return; + } + + assert(*pCount >= 1); + + *pQueueFamilyProperties = (VkQueueFamilyProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 36, /* XXX: Real value here */ + .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + }; +} + +void anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkDeviceSize heap_size; + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * physical_device->aperture_size / 4; + + if (physical_device->info->has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + pMemoryProperties->memoryTypeCount = 2; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + pMemoryProperties->memoryTypes[1] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + }; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_GetInstanceProcAddr(instance, pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + queue->device = device; + queue->pool = &device->surface_state_pool; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +} + +static struct anv_state +anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) +{ + struct anv_state state; + + state = anv_state_pool_alloc(pool, size, align); + memcpy(state.map, p, size); + + if (!pool->block_pool->device->info.has_llc) + anv_state_clflush(state); + + return state; +} + +struct gen8_border_color { + union { + float float32[4]; + uint32_t uint32[4]; + }; + /* Pad out to 64 bytes */ + uint32_t _pad[12]; +}; + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const struct gen8_border_color border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, + sizeof(border_colors), 64, + border_colors); +} + +VkResult +anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch) +{ + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo bo; + VkResult result = VK_SUCCESS; + uint32_t size; + int64_t timeout; + int ret; + + /* Kernel driver requires 8 byte aligned batch length */ + size = align_u32(batch->next - batch->start, 8); + assert(size < device->batch_bo_pool.bo_size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); + if (result != VK_SUCCESS) + return result; + + memcpy(bo.map, batch->start, size); + if (!device->info.has_llc) + anv_clflush_range(bo.map, size); + + exec2_objects[0].handle = bo.gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo.offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = 0; + execbuf.batch_len = size; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo.gem_handle, &timeout); + if (ret != 0) { + /* We don't know the real error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); + goto fail; + } + + fail: + anv_bo_pool_free(&device->batch_bo_pool, &bo); + + return result; +} + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkResult result; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + bool found = false; + for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + device_extensions[j].extensionName) == 0) { + found = true; + break; + } + } + if (!found) + return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); + } + + anv_set_dispatch_devinfo(physical_device->info); + + device = anv_alloc2(&physical_device->instance->alloc, pAllocator, + sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; + device->instance = physical_device->instance; + device->chipset_id = physical_device->chipset_id; + + if (pAllocator) + device->alloc = *pAllocator; + else + device->alloc = physical_device->instance->alloc; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_device; + } + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + device->info = *physical_device->info; + device->isl_dev = physical_device->isl_dev; + + pthread_mutex_init(&device->mutex, NULL); + + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); + anv_pipeline_cache_init(&device->default_pipeline_cache, device); + + anv_block_pool_init(&device->surface_state_block_pool, device, 4096); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_bo_init_new(&device->workaround_bo, device, 1024); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + anv_queue_init(device, &device->queue); + + switch (device->info.gen) { + case 7: + if (!device->info.is_haswell) + result = gen7_init_device_state(device); + else + result = gen75_init_device_state(device); + break; + case 8: + result = gen8_init_device_state(device); + break; + case 9: + result = gen9_init_device_state(device); + break; + default: + /* Shouldn't get here as we don't create physical devices for any other + * gens. */ + unreachable("unhandled gen"); + } + if (result != VK_SUCCESS) + goto fail_fd; + + result = anv_device_init_meta(device); + if (result != VK_SUCCESS) + goto fail_fd; + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_free(&device->alloc, device); + + return result; +} + +void anv_DestroyDevice( + VkDevice _device, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); + anv_gem_close(device, device->workaround_bo.gem_handle); + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); + + close(device->fd); + + pthread_mutex_destroy(&device->mutex); + + anv_free(&device->alloc, device); +} + +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + + *pPropertyCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = ARRAY_SIZE(device_extensions); + return VK_SUCCESS; + } + + assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + + *pPropertyCount = ARRAY_SIZE(device_extensions); + memcpy(pProperties, device_extensions, sizeof(device_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_EnumerateInstanceLayerProperties( + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +VkResult anv_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_LAYER_NOT_PRESENT); +} + +void anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < submitCount; i++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, + pSubmits[i].pCommandBuffers[j]); + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + + for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) + cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; + } + } + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "execbuf2 failed: %m"); + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_batch batch; + + uint32_t cmds[8]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + return anv_device_submit_simple_batch(device, &batch); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + bo->is_winsys_bo = false; + + return VK_SUCCESS; +} + +VkResult anv_AllocateMemory( + VkDevice _device, + const VkMemoryAllocateInfo* pAllocateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + /* We support exactly one memory heap. */ + assert(pAllocateInfo->memoryTypeIndex == 0 || + (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* The kernel is going to give us whole pages anyway */ + uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + + result = anv_bo_init_new(&mem->bo, device, alloc_size); + if (result != VK_SUCCESS) + goto fail; + + mem->type_index = pAllocateInfo->memoryTypeIndex; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} + +void anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem == NULL) + return; + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_free2(&device->alloc, pAllocator, mem); +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (size == VK_WHOLE_SIZE) + size = mem->bo.size - offset; + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + uint32_t gem_flags = 0; + if (!device->info.has_llc && mem->type_index == 0) + gem_flags |= I915_MMAP_WC; + + /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ + uint64_t map_offset = offset & ~4095ull; + assert(offset >= map_offset); + uint64_t map_size = (offset + size) - map_offset; + + /* Let's map whole pages */ + map_size = align_u64(map_size, 4096); + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + mem->map_size = map_size; + + *ppData = mem->map + (offset - map_offset); + + return VK_SUCCESS; +} + +void anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _memory) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + + if (mem == NULL) + return; + + anv_gem_munmap(mem->map, mem->map_size); +} + +static void +clflush_mapped_ranges(struct anv_device *device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); + void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); + void *end; + + if (ranges[i].offset + ranges[i].size > mem->map_size) + end = mem->map + mem->map_size; + else + end = mem->map + ranges[i].offset + ranges[i].size; + + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } + } +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange* pMemoryRanges) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + if (device->info.has_llc) + return VK_SUCCESS; + + clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); + + /* Make sure no reads get moved up above the invalidate. */ + __builtin_ia32_mfence(); + + return VK_SUCCESS; +} + +void anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; +} + +void anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; +} + +void anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + stub(); +} + +void anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + if (mem) { + buffer->bo = &mem->bo; + buffer->offset = memoryOffset; + } else { + buffer->bo = NULL; + buffer->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _memory, + VkDeviceSize memoryOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _memory); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (mem) { + image->bo = &mem->bo; + image->offset = memoryOffset; + } else { + image->bo = NULL; + image->offset = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) +{ + stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN7_MI_NOOP); + + if (!device->info.has_llc) { + assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); + assert(batch.next - fence->bo.map <= CACHELINE_SIZE); + __builtin_ia32_mfence(); + __builtin_ia32_clflush(fence->bo.map); + } + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + fence->ready = false; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, fence); + + return result; +} + +void anv_DestroyFence( + VkDevice _device, + VkFence _fence, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, fence); +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed + * to block indefinitely timeouts <= 0. Unfortunately, this was broken + * for a couple of kernel releases. Since there's no way to know + * whether or not the kernel we're using is one of the broken ones, the + * best we can do is to clamp the timeout to INT64_MAX. This limits the + * maximum timeout from 584 years to 292 years - likely not a big deal. + */ + if (timeout > INT64_MAX) + timeout = INT64_MAX; + + int64_t t = timeout; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem wait failed: %m"); + } + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) +{ + /* The DRM execbuffer ioctl always execute in-oder, even between different + * rings. As such, there's nothing to do for the user space semaphore. + */ + + *pSemaphore = (VkSemaphore)1; + + return VK_SUCCESS; +} + +void anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks* pAllocator) +{ +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice _device, + const VkEventCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkEvent* pEvent) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_event *event; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); + + state = anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(*event), 8); + event = state.map; + event->state = state; + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + *pEvent = anv_event_to_handle(event); + + return VK_SUCCESS; +} + +void anv_DestroyEvent( + VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_state_pool_free(&device->dynamic_state_pool, event->state); +} + +VkResult anv_GetEventStatus( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + if (!device->info.has_llc) { + /* Invalidate read cache before reading event written by GPU. */ + __builtin_ia32_clflush(event); + __builtin_ia32_mfence(); + + } + + return event->semaphore; +} + +VkResult anv_SetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_SET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +VkResult anv_ResetEvent( + VkDevice _device, + VkEvent _event) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_event, event, _event); + + event->semaphore = VK_EVENT_RESET; + + if (!device->info.has_llc) { + /* Make sure the writes we're flushing have landed. */ + __builtin_ia32_mfence(); + __builtin_ia32_clflush(event); + } + + return VK_SUCCESS; +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_free2(&device->alloc, pAllocator, buffer); +} + +void +anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_buffer_surface_state(state.map, format, offset, range, + stride); + else + gen7_fill_buffer_surface_state(state.map, format, offset, range, + stride); + break; + case 8: + gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + case 9: + gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +void anv_DestroySampler( + VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_free2(&device->alloc, pAllocator, sampler); +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + framebuffer->attachments[i] = anv_image_view_from_handle(_iview); + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +void anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_free2(&device->alloc, pAllocator, fb); +} + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCommandBuffer commandBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCommandBuffer commandBuffer) +{ +} diff --git a/src/intel/vulkan/anv_dump.c b/src/intel/vulkan/anv_dump.c new file mode 100644 index 0000000..b7fa28b --- /dev/null +++ b/src/intel/vulkan/anv_dump.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +/* This file contains utility functions for help debugging. They can be + * called from GDB or similar to help inspect images and buffers. + */ + +void +anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename) +{ + VkDevice vk_device = anv_device_to_handle(device); + VkResult result; + + VkExtent2D extent = { image->extent.width, image->extent.height }; + for (unsigned i = 0; i < miplevel; i++) { + extent.width = MAX2(1, extent.width / 2); + extent.height = MAX2(1, extent.height / 2); + } + + VkImage copy_image; + result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_R8G8B8A8_UNORM, + .extent = (VkExtent3D) { extent.width, extent.height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .flags = 0, + }, NULL, ©_image); + assert(result == VK_SUCCESS); + + VkMemoryRequirements reqs; + anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); + + VkDeviceMemory memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = reqs.size, + .memoryTypeIndex = 0, + }, NULL, &memory); + assert(result == VK_SUCCESS); + + result = anv_BindImageMemory(vk_device, copy_image, memory, 0); + assert(result == VK_SUCCESS); + + VkCommandPool commandPool; + result = anv_CreateCommandPool(vk_device, + &(VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .queueFamilyIndex = 0, + .flags = 0, + }, NULL, &commandPool); + assert(result == VK_SUCCESS); + + VkCommandBuffer cmd; + result = anv_AllocateCommandBuffers(vk_device, + &(VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = commandPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }, &cmd); + assert(result == VK_SUCCESS); + + result = anv_BeginCommandBuffer(cmd, + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }); + assert(result == VK_SUCCESS); + + anv_CmdBlitImage(cmd, + anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, + copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, + &(VkImageBlit) { + .srcSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = miplevel, + .baseArrayLayer = array_layer, + .layerCount = 1, + }, + .srcOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + .dstSubresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = { + { 0, 0, 0 }, + { extent.width, extent.height, 1 }, + }, + }, VK_FILTER_NEAREST); + + ANV_CALL(CmdPipelineBarrier)(cmd, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + true, 0, NULL, 0, NULL, 1, + &(VkImageMemoryBarrier) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_HOST_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = 0, + .dstQueueFamilyIndex = 0, + .image = copy_image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); + + result = anv_EndCommandBuffer(cmd); + assert(result == VK_SUCCESS); + + VkFence fence; + result = anv_CreateFence(vk_device, + &(VkFenceCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = 0, + }, NULL, &fence); + assert(result == VK_SUCCESS); + + result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, + &(VkSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd, + }, fence); + assert(result == VK_SUCCESS); + + result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); + assert(result == VK_SUCCESS); + + anv_DestroyFence(vk_device, fence, NULL); + anv_DestroyCommandPool(vk_device, commandPool, NULL); + + uint8_t *map; + result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); + assert(result == VK_SUCCESS); + + VkSubresourceLayout layout; + anv_GetImageSubresourceLayout(vk_device, copy_image, + &(VkImageSubresource) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .arrayLayer = 0, + }, &layout); + + map += layout.offset; + + /* Now we can finally write the PPM file */ + FILE *file = fopen(filename, "wb"); + assert(file); + + fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); + for (unsigned y = 0; y < extent.height; y++) { + uint8_t row[extent.width * 3]; + for (unsigned x = 0; x < extent.width; x++) { + row[x * 3 + 0] = map[x * 4 + 0]; + row[x * 3 + 1] = map[x * 4 + 1]; + row[x * 3 + 2] = map[x * 4 + 2]; + } + fwrite(row, 3, extent.width, file); + + map += layout.rowPitch; + } + fclose(file); + + anv_UnmapMemory(vk_device, memory); + anv_DestroyImage(vk_device, copy_image, NULL); + anv_FreeMemory(vk_device, memory, NULL); +} diff --git a/src/intel/vulkan/anv_entrypoints_gen.py b/src/intel/vulkan/anv_entrypoints_gen.py new file mode 100644 index 0000000..1e4cfcb --- /dev/null +++ b/src/intel/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,324 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + print "/* This file generated from vk_gen.py, don't edit directly. */\n" + + print "struct anv_dispatch_table {" + print " union {" + print " void *entrypoints[%d];" % len(entrypoints) + print " struct {" + + for type, name, args, num, h in entrypoints: + print " %s (*%s)%s;" % (type, name, args) + print " };\n" + print " };\n" + print "};\n" + + print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" + + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s gen7_%s%s;" % (type, name, args) + print "%s gen75_%s%s;" % (type, name, args) + print "%s gen8_%s%s;" % (type, name, args) + print "%s gen9_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x }," % (offsets[num], h) +print "};\n" + +for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: + for type, name, args, num, h in entrypoints: + print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) + print "\nconst struct anv_dispatch_table %s_layer = {" % layer + for type, name, args, num, h in entrypoints: + print " .%s = %s_%s," % (name, layer, name) + print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static const struct brw_device_info *dispatch_devinfo; + +void +anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) +{ + dispatch_devinfo = devinfo; +} + +void * __attribute__ ((noinline)) +anv_resolve_entrypoint(uint32_t index) +{ + if (enable_validate && validate_layer.entrypoints[index]) + return validate_layer.entrypoints[index]; + + if (dispatch_devinfo == NULL) { + assert(anv_layer.entrypoints[index]); + return anv_layer.entrypoints[index]; + } + + switch (dispatch_devinfo->gen) { + case 9: + if (gen9_layer.entrypoints[index]) + return gen9_layer.entrypoints[index]; + /* fall through */ + case 8: + if (gen8_layer.entrypoints[index]) + return gen8_layer.entrypoints[index]; + /* fall through */ + case 7: + if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) + return gen75_layer.entrypoints[index]; + + if (gen7_layer.entrypoints[index]) + return gen7_layer.entrypoints[index]; + /* fall through */ + case 0: + return anv_layer.entrypoints[index]; + default: + unreachable("unsupported gen\\n"); + } +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return anv_resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c new file mode 100644 index 0000000..7798a7b --- /dev/null +++ b/src/intel/vulkan/anv_formats.c @@ -0,0 +1,603 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" +#include "brw_surface_formats.h" + +#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) +#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) + +#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ + [__vk_fmt] = { \ + .vk_format = __vk_fmt, \ + .name = #__vk_fmt, \ + .isl_format = __hw_fmt, \ + .isl_layout = &isl_format_layouts[__hw_fmt], \ + .swizzle = __swizzle, \ + __VA_ARGS__ \ + } + +#define fmt(__vk_fmt, __hw_fmt, ...) \ + swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) + +/* HINT: For array formats, the ISL name should match the VK name. For + * packed formats, they should have the channels in reverse order from each + * other. The reason for this is that, for packed formats, the ISL (and + * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. + */ +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), + fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), + swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), + fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), + swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), + fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), + fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), + fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), + fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), + fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), + fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), + fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), + fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), + fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), + fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), + fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), + fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), + fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), + fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), + fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), + fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), + fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), + fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), + fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), + fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), + fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), + fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), + fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), + fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), + fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), + fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), + fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), + fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), + fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), + fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), + fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), + fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), + fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), + fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), + fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), + fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), + fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), + fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), + fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), + fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), + fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), + fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), + fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), + fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), + fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), + fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), + fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), + fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), + fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), + fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), + fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), + fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), + fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), + fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), + fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), + fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), + fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), + fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), + fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), + fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), + fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), + fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), + fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), + fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), + fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), + fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), + fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), + fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), + fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), + fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), + fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), + fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), + fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), + fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), + fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), + fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), + fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), + fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), + fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), + fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), + fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), + fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), + fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), + + fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), + fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), + fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), + fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), + fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), + fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), + fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), + fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), + fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), + fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), + fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), + fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), + fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), + fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), + fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), + fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), + fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), + fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), + fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), + fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), + fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), + fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), + fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), + fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), + fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle) +{ + const struct anv_format *anv_fmt = &anv_formats[format]; + + if (swizzle) + *swizzle = anv_fmt->swizzle; + + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { + return ISL_FORMAT_UNSUPPORTED; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL && + !util_is_power_of_two(anv_fmt->isl_layout->bs)) { + /* Tiled formats *must* be power-of-two because we need up upload + * them with the render pipeline. For 3-channel formats, we fix + * this by switching them over to RGBX or RGBA formats under the + * hood. + */ + enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); + if (rgbx != ISL_FORMAT_UNSUPPORTED) + return rgbx; + else + return isl_format_rgb_to_rgba(anv_fmt->isl_format); + } else { + return anv_fmt->isl_format; + } + + case VK_IMAGE_ASPECT_DEPTH_BIT: + case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): + assert(anv_fmt->has_depth); + return anv_fmt->isl_format; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(anv_fmt->has_stencil); + return ISL_FORMAT_R8_UINT; + + default: + unreachable("bad VkImageAspect"); + return ISL_FORMAT_UNSUPPORTED; + } +} + +// Format capabilities + +void anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +static VkFormatFeatureFlags +get_image_format_properties(int gen, enum isl_format base, + enum isl_format actual, + struct anv_format_swizzle swizzle) +{ + const struct brw_surface_format_info *info = &surface_formats[actual]; + + if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen) { + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + + if (info->filtering <= gen) + flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } + + /* We can render to swizzled formats. However, if the alpha channel is + * moved, then blending won't work correctly. The PRM tells us + * straight-up not to render to such a surface. + */ + if (info->render_target <= gen && swizzle.a == 3) { + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + + if (info->alpha_blend <= gen && swizzle.a == 3) + flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + /* Load/store is determined based on base format. This prevents RGB + * formats from showing up as load/store capable. + */ + if (isl_is_storage_image_format(base)) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + + if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + + return flags; +} + +static VkFormatFeatureFlags +get_buffer_format_properties(int gen, enum isl_format format) +{ + const struct brw_surface_format_info *info = &surface_formats[format]; + + if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) + return 0; + + VkFormatFeatureFlags flags = 0; + if (info->sampling <= gen && !isl_format_is_compressed(format)) + flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + if (info->input_vb <= gen) + flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (isl_is_storage_image_format(format)) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) + flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + + return flags; +} + +static void +anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + int gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; + if (anv_format_is_depth_or_stencil(&anv_formats[format])) { + tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + if (physical_device->info->gen >= 8) { + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + if (anv_formats[format].has_depth) { + tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } else { + enum isl_format linear_fmt, tiled_fmt; + struct anv_format_swizzle linear_swizzle, tiled_swizzle; + linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, &linear_swizzle); + tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); + + linear = get_image_format_properties(gen, linear_fmt, linear_fmt, + linear_swizzle); + tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, + tiled_swizzle); + buffer = get_buffer_format_properties(gen, linear_fmt); + + /* XXX: We handle 3-channel formats by switching them out for RGBX or + * RGBA formats behind-the-scenes. This works fine for textures + * because the upload process will fill in the extra channel. + * We could also support it for render targets, but it will take + * substantially more work and we have enough RGBX formats to handle + * what most clients will want. + */ + if (linear_fmt != ISL_FORMAT_UNSUPPORTED && + !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && + isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { + tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & + ~VK_FORMAT_FEATURE_BLIT_DST_BIT; + } + } + + out_properties->linearTilingFeatures = linear; + out_properties->optimalTilingFeatures = tiled; + out_properties->bufferFeatures = buffer; + + return; +} + + +void anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + anv_physical_device_get_format_properties( + physical_device, + format, + pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties* pImageFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + + anv_physical_device_get_format_properties(physical_device, format, + &format_props); + + /* Extract the VkFormatFeatureFlags that are relevant for the queried + * tiling. + */ + if (tiling == VK_IMAGE_TILING_LINEAR) { + format_feature_flags = format_props.linearTilingFeatures; + } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { + format_feature_flags = format_props.optimalTilingFeatures; + } else { + unreachable("bad VkImageTiling"); + } + + switch (type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + sampleCounts = VK_SAMPLE_COUNT_1_BIT; + break; + case VK_IMAGE_TYPE_2D: + /* FINISHME: Does this really differ for cube maps? The documentation + * for RENDER_SURFACE_STATE suggests so. + */ + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (tiling == VK_IMAGE_TILING_OPTIMAL && + type == VK_IMAGE_TYPE_2D && + (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + +#if 0 + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (anv_format_for_vk_format(format)->has_stencil) { + /* Not yet implemented because copying to a W-tiled surface is crazy + * hard. + */ + anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " + "stencil format"); + goto unsupported; + } + } +#endif + + if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { + /* Nothing to check. */ + } + + if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + /* Ignore this flag because it was removed from the + * provisional_I_20150910 header. + */ + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + return VK_SUCCESS; + +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_SUCCESS; +} + +void anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + /* Sparse images are not yet supported. */ + *pNumProperties = 0; +} diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c new file mode 100644 index 0000000..0a7be35 --- /dev/null +++ b/src/intel/vulkan/anv_gem.c @@ -0,0 +1,358 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + gem_mmap.flags = flags; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +int +anv_gem_set_caching(struct anv_device *device, + uint32_t gem_handle, uint32_t caching) +{ + struct drm_i915_gem_caching gem_caching; + + VG_CLEAR(gem_caching); + gem_caching.handle = gem_handle; + gem_caching.caching = caching; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + struct drm_i915_gem_set_domain gem_set_domain; + + VG_CLEAR(gem_set_domain); + gem_set_domain.handle = gem_handle; + gem_set_domain.read_domains = read_domains; + gem_set_domain.write_domain = write_domain; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + struct drm_gem_close close; + int ret; + + struct drm_i915_gem_create gem_create; + VG_CLEAR(gem_create); + gem_create.size = 4096; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { + assert(!"Failed to create GEM BO"); + return false; + } + + bool swizzled = false; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + struct drm_i915_gem_set_tiling set_tiling; + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_create.handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + if (ret != 0) { + assert(!"Failed to set BO tiling"); + goto close_and_return; + } + + struct drm_i915_gem_get_tiling get_tiling; + VG_CLEAR(get_tiling); + get_tiling.handle = gem_create.handle; + + if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { + assert(!"Failed to get BO tiling"); + goto close_and_return; + } + + swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; + +close_and_return: + + VG_CLEAR(close); + close.handle = gem_create.handle; + anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + + return swizzled; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c new file mode 100644 index 0000000..3204fef --- /dev/null +++ b/src/intel/vulkan/anv_gem_stubs.c @@ -0,0 +1,159 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include +#include +#include + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, uint32_t gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size, uint32_t flags) +{ + /* Ignore flags, as they're specific to I915_GEM_MMAP. */ + (void) flags; + + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +uint32_t +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + uint32_t gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, + uint32_t caching) +{ + return 0; +} + +int +anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +bool +anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) +{ + unreachable("Unused"); +} + +uint32_t +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} diff --git a/src/intel/vulkan/anv_gen_macros.h b/src/intel/vulkan/anv_gen_macros.h new file mode 100644 index 0000000..ef2ecd5 --- /dev/null +++ b/src/intel/vulkan/anv_gen_macros.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +/* Macros for handling per-gen compilation. + * + * The prefixing macros GENX() and genX() automatically prefix whatever you + * give them by GENX_ or genX_ where X is the gen number. + * + * You can declare a function to be used on some range of gens like this: + * + * GENX_FUNC(GEN7, GEN75) void + * genX(my_function_name)(args...) + * { + * // Do stuff + * } + * + * If the file is compiled for any set of gens containing gen7 and gen75, + * the function will effectively only get compiled twice as + * gen7_my_function_nmae and gen75_my_function_name. The function has to + * be compilable on all gens, but it will become a static inline that gets + * discarded by the compiler on all gens not in range. + * + * You can do pseudo-runtime checks in your function such as + * + * if (ANV_GEN > 8 || ANV_IS_HASWELL) { + * // Do something + * } + * + * The contents of the if statement must be valid regardless of gen, but + * the if will get compiled away on everything except haswell. + * + * For places where you really do have a compile-time conflict, you can + * use preprocessor logic: + * + * #if (ANV_GEN > 8 || ANV_IS_HASWELL) + * // Do something + * #endif + * + * However, it is strongly recommended that the former be used whenever + * possible. + */ + +/* Base macro defined on the command line. If we don't have this, we can't + * do anything. + */ +#ifdef ANV_GENx10 + +/* Gen checking macros */ +#define ANV_GEN ((ANV_GENx10) / 10) +#define ANV_IS_HASWELL ((ANV_GENx10) == 75) + +/* Prefixing macros */ +#if (ANV_GENx10 == 70) +# define GENX(X) GEN7_##X +# define genX(x) gen7_##x +#elif (ANV_GENx10 == 75) +# define GENX(X) GEN75_##X +# define genX(x) gen75_##x +#elif (ANV_GENx10 == 80) +# define GENX(X) GEN8_##X +# define genX(x) gen8_##x +#elif (ANV_GENx10 == 90) +# define GENX(X) GEN9_##X +# define genX(x) gen9_##x +#else +# error "Need to add prefixing macros for your gen" +#endif + +/* Macros for comparing gens */ +#if (ANV_GENx10 >= 70) +#define __ANV_GEN_GE_GEN7(T, F) T +#else +#define __ANV_GEN_GE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 <= 70) +#define __ANV_GEN_LE_GEN7(T, F) T +#else +#define __ANV_GEN_LE_GEN7(T, F) F +#endif + +#if (ANV_GENx10 >= 75) +#define __ANV_GEN_GE_GEN75(T, F) T +#else +#define __ANV_GEN_GE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 <= 75) +#define __ANV_GEN_LE_GEN75(T, F) T +#else +#define __ANV_GEN_LE_GEN75(T, F) F +#endif + +#if (ANV_GENx10 >= 80) +#define __ANV_GEN_GE_GEN8(T, F) T +#else +#define __ANV_GEN_GE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 <= 80) +#define __ANV_GEN_LE_GEN8(T, F) T +#else +#define __ANV_GEN_LE_GEN8(T, F) F +#endif + +#if (ANV_GENx10 >= 90) +#define __ANV_GEN_GE_GEN9(T, F) T +#else +#define __ANV_GEN_GE_GEN9(T, F) F +#endif + +#if (ANV_GENx10 <= 90) +#define __ANV_GEN_LE_GEN9(T, F) T +#else +#define __ANV_GEN_LE_GEN9(T, F) F +#endif + +#define __ANV_GEN_IN_RANGE(start, end, T, F) \ + __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) + +/* Declares a function as static inlind if it's not in range */ +#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) + +#endif /* ANV_GENx10 */ diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c new file mode 100644 index 0000000..0a412a3 --- /dev/null +++ b/src/intel/vulkan/anv_image.c @@ -0,0 +1,911 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** + * Exactly one bit must be set in \a aspect. + */ +static isl_surf_usage_flags_t +choose_isl_surf_usage(VkImageUsageFlags vk_usage, + VkImageAspectFlags aspect) +{ + isl_surf_usage_flags_t isl_usage = 0; + + /* FINISHME: Support aux surfaces */ + isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; + + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + + if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + isl_usage |= ISL_SURF_USAGE_CUBE_BIT; + + if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_DEPTH_BIT: + isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; + break; + } + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta implements transfers by sampling from the source image. */ + isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; + } + + if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* Meta implements transfers by rendering into the destination image. */ + isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; + } + + return isl_usage; +} + +/** + * Exactly one bit must be set in \a aspect. + */ +static struct anv_surface * +get_surface(struct anv_image *image, VkImageAspectFlags aspect) +{ + switch (aspect) { + default: + unreachable("bad VkImageAspect"); + case VK_IMAGE_ASPECT_COLOR_BIT: + return &image->color_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT: + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return &image->stencil_surface; + } +} + +/** + * Initialize the anv_image::*_surface selected by \a aspect. Then update the + * image's memory requirements (that is, the image's size and alignment). + * + * Exactly one bit must be set in \a aspect. + */ +static VkResult +make_surface(const struct anv_device *dev, + struct anv_image *image, + const struct anv_image_create_info *anv_info, + VkImageAspectFlags aspect) +{ + const VkImageCreateInfo *vk_info = anv_info->vk_info; + bool ok UNUSED; + + static const enum isl_surf_dim vk_to_isl_surf_dim[] = { + [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, + [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, + [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, + }; + + isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; + if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) + tiling_flags &= ISL_TILING_LINEAR_BIT; + + struct anv_surface *anv_surf = get_surface(image, aspect); + + VkExtent3D extent; + switch (vk_info->imageType) { + case VK_IMAGE_TYPE_1D: + extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; + break; + case VK_IMAGE_TYPE_2D: + extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; + break; + case VK_IMAGE_TYPE_3D: + extent = vk_info->extent; + break; + default: + unreachable("invalid image type"); + } + + image->extent = extent; + + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, + .dim = vk_to_isl_surf_dim[vk_info->imageType], + .format = anv_get_isl_format(vk_info->format, aspect, + vk_info->tiling, NULL), + .width = extent.width, + .height = extent.height, + .depth = extent.depth, + .levels = vk_info->mipLevels, + .array_len = vk_info->arrayLayers, + .samples = vk_info->samples, + .min_alignment = 0, + .min_pitch = 0, + .usage = choose_isl_surf_usage(image->usage, aspect), + .tiling_flags = tiling_flags); + + /* isl_surf_init() will fail only if provided invalid input. Invalid input + * is illegal in Vulkan. + */ + assert(ok); + + anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); + image->size = anv_surf->offset + anv_surf->isl.size; + image->alignment = MAX(image->alignment, anv_surf->isl.alignment); + + return VK_SUCCESS; +} + +/** + * Parameter @a format is required and overrides VkImageCreateInfo::format. + */ +static VkImageUsageFlags +anv_image_get_full_usage(const VkImageCreateInfo *info, + const struct anv_format *format) +{ + VkImageUsageFlags usage = info->usage; + + if (info->samples > 1 && + (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + /* Meta will resolve the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + /* Meta will transfer from the image by binding it as a texture. */ + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + /* For non-clear transfer operations, meta will transfer to the image by + * binding it as a color attachment, even if the image format is not + * a color format. + */ + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (anv_format_is_depth_or_stencil(format)) { + /* vkCmdClearDepthStencilImage() only requires that + * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does + * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta + * clears the image, though, by binding it as a depthstencil + * attachment. + */ + usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } + } + + return usage; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + const VkAllocationCallbacks* alloc, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + struct anv_image *image = NULL; + const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arrayLayers > 0); + anv_assert(pCreateInfo->samples > 0); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->vk_format = pCreateInfo->format; + image->format = format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arrayLayers; + image->samples = pCreateInfo->samples; + image->usage = anv_image_get_full_usage(pCreateInfo, format); + image->tiling = pCreateInfo->tiling; + + if (likely(anv_format_is_color(format))) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_COLOR_BIT); + if (r != VK_SUCCESS) + goto fail; + } else { + if (image->format->has_depth) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_DEPTH_BIT); + if (r != VK_SUCCESS) + goto fail; + } + + if (image->format->has_stencil) { + r = make_surface(device, image, create_info, + VK_IMAGE_ASPECT_STENCIL_BIT); + if (r != VK_SUCCESS) + goto fail; + } + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_free2(&device->alloc, alloc, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + .isl_tiling_flags = ISL_TILING_ANY_MASK, + }, + pAllocator, + pImage); +} + +void +anv_DestroyImage(VkDevice _device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); +} + +static void +anv_surface_get_subresource_layout(struct anv_image *image, + struct anv_surface *surface, + const VkImageSubresource *subresource, + VkSubresourceLayout *layout) +{ + /* If we are on a non-zero mip level or array slice, we need to + * calculate a real offset. + */ + anv_assert(subresource->mipLevel == 0); + anv_assert(subresource->arrayLayer == 0); + + layout->offset = surface->offset; + layout->rowPitch = surface->isl.row_pitch; + layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); + layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); + layout->size = surface->isl.size; +} + +void anv_GetImageSubresourceLayout( + VkDevice device, + VkImage _image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + assert(__builtin_popcount(pSubresource->aspectMask) == 1); + + switch (pSubresource->aspectMask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + anv_surface_get_subresource_layout(image, &image->color_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + anv_surface_get_subresource_layout(image, &image->depth_surface, + pSubresource, pLayout); + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + anv_surface_get_subresource_layout(image, &image->stencil_surface, + pSubresource, pLayout); + break; + default: + assert(!"Invalid image aspect"); + } +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_format *view_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); + assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspectMask != 0); + assert(subresource->levelCount > 0); + assert(subresource->layerCount > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); + assert(subresource->baseArrayLayer < image->array_size); + assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); + assert(pView); + + const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT + | VK_IMAGE_ASPECT_STENCIL_BIT; + + /* Validate format. */ + if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(!image->format->has_depth); + assert(!image->format->has_stencil); + assert(!view_format_info->has_depth); + assert(!view_format_info->has_stencil); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } else if (subresource->aspectMask & ds_flags) { + assert((subresource->aspectMask & ~ds_flags) == 0); + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(image->format->has_depth); + assert(view_format_info->has_depth); + assert(view_format_info->isl_layout->bs == + image->format->isl_layout->bs); + } + + if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image->format->has_stencil); + assert(view_format_info->has_stencil); + } + } else { + assert(!"bad VkImageSubresourceRange::aspectFlags"); + } + + return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); +} + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + gen75_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + else + gen7_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 8: + gen8_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + case 9: + gen9_fill_image_surface_state(device, state.map, iview, + pCreateInfo, usage); + break; + default: + unreachable("unsupported gen\n"); + } + + if (!device->info.has_llc) + anv_state_clflush(state); +} + +static struct anv_state +alloc_surface_state(struct anv_device *device, + struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer) { + return anv_cmd_buffer_alloc_surface_state(cmd_buffer); + } else { + return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } +} + +static bool +has_matching_storage_typed_format(const struct anv_device *device, + enum isl_format format) +{ + return (isl_format_get_layout(format)->bs <= 4 || + (isl_format_get_layout(format)->bs <= 8 && + (device->info.gen >= 8 || device->info.is_haswell)) || + device->info.gen >= 9); +} + +static VkComponentSwizzle +remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, + struct anv_format_swizzle format_swizzle) +{ + if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) + swizzle = component; + + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case VK_COMPONENT_SWIZZLE_ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case VK_COMPONENT_SWIZZLE_R: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; + case VK_COMPONENT_SWIZZLE_G: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; + case VK_COMPONENT_SWIZZLE_B: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; + case VK_COMPONENT_SWIZZLE_A: + return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; + default: + unreachable("Invalid swizzle"); + } +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkImageViewCreateInfo mCreateInfo; + memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); + + assert(range->layerCount > 0); + assert(range->baseMipLevel < image->levels); + assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + switch (image->type) { + default: + unreachable("bad VkImageType"); + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); + break; + case VK_IMAGE_TYPE_3D: + assert(range->baseArrayLayer + range->layerCount - 1 + <= anv_minify(image->extent.depth, range->baseMipLevel)); + break; + } + + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + iview->image = image; + iview->bo = image->bo; + iview->offset = image->offset + surface->offset + offset; + + iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; + iview->vk_format = pCreateInfo->format; + + struct anv_format_swizzle swizzle; + iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, + image->tiling, &swizzle); + iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, + VK_COMPONENT_SWIZZLE_R, swizzle); + iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, + VK_COMPONENT_SWIZZLE_G, swizzle); + iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, + VK_COMPONENT_SWIZZLE_B, swizzle); + iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, + VK_COMPONENT_SWIZZLE_A, swizzle); + + iview->base_layer = range->baseArrayLayer; + iview->base_mip = range->baseMipLevel; + + if (!isl_format_is_compressed(iview->format) && + isl_format_is_compressed(image->format->isl_format)) { + /* Scale the ImageView extent by the backing Image. This is used + * internally when an uncompressed ImageView is created on a + * compressed Image. The ImageView can therefore be used for copying + * data from a source Image to a destination Image. + */ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + + iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); + iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); + + iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; + iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); + mCreateInfo.subresourceRange.baseMipLevel = 0; + mCreateInfo.subresourceRange.baseArrayLayer = 0; + } else { + iview->level_0_extent.width = image->extent.width; + iview->level_0_extent.height = image->extent.height; + iview->level_0_extent.depth = image->extent.depth; + } + + iview->extent = (VkExtent3D) { + .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), + .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), + .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), + }; + + if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->sampler_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_SAMPLED_BIT); + } else { + iview->sampler_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); + + anv_fill_image_surface_state(device, iview->color_rt_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + } else { + iview->color_rt_surface_state.alloc_size = 0; + } + + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); + + if (has_matching_storage_typed_format(device, iview->format)) + anv_fill_image_surface_state(device, iview->storage_surface_state, + iview, &mCreateInfo, + VK_IMAGE_USAGE_STORAGE_BIT); + else + anv_fill_buffer_surface_state(device, iview->storage_surface_state, + ISL_FORMAT_RAW, + iview->offset, + iview->bo->size - iview->offset, 1); + + } else { + iview->storage_surface_state.alloc_size = 0; + } +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL, 0); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyImageView(VkDevice _device, VkImageView _iview, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + if (iview->color_rt_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->color_rt_surface_state); + } + + if (iview->sampler_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->sampler_surface_state); + } + + if (iview->storage_surface_state.alloc_size > 0) { + anv_state_pool_free(&device->surface_state_pool, + iview->storage_surface_state); + } + + anv_free2(&device->alloc, pAllocator, iview); +} + +VkResult +anv_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *view; + + view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + const struct anv_format *format = + anv_format_for_vk_format(pCreateInfo->format); + + view->format = format->isl_format; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->range = pCreateInfo->range == VK_WHOLE_SIZE ? + buffer->size - view->offset : pCreateInfo->range; + + if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + anv_fill_buffer_surface_state(device, view->surface_state, + view->format, + view->offset, view->range, + format->isl_layout->bs); + } else { + view->surface_state = (struct anv_state){ 0 }; + } + + if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { + view->storage_surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + + enum isl_format storage_format = + has_matching_storage_typed_format(device, view->format) ? + isl_lower_storage_image_format(&device->isl_dev, view->format) : + ISL_FORMAT_RAW; + + anv_fill_buffer_surface_state(device, view->storage_surface_state, + storage_format, + view->offset, view->range, + (storage_format == ISL_FORMAT_RAW ? 1 : + format->isl_layout->bs)); + + } else { + view->storage_surface_state = (struct anv_state){ 0 }; + } + + *pView = anv_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +void +anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + + if (view->surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->surface_state); + + if (view->storage_surface_state.alloc_size > 0) + anv_state_pool_free(&device->surface_state_pool, + view->storage_surface_state); + + anv_free2(&device->alloc, pAllocator, view); +} + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) +{ + switch (aspect_mask) { + case VK_IMAGE_ASPECT_COLOR_BIT: + /* Dragons will eat you. + * + * Meta attaches all destination surfaces as color render targets. Guess + * what surface the Meta Dragons really want. + */ + if (image->format->has_depth && image->format->has_stencil) { + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } else { + return &image->color_surface; + } + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + assert(image->format->has_depth); + return &image->depth_surface; + case VK_IMAGE_ASPECT_STENCIL_BIT: + assert(image->format->has_stencil); + return &image->stencil_surface; + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + if (image->format->has_depth && image->format->has_stencil) { + /* FINISHME: The Vulkan spec (git a511ba2) requires support for + * combined depth stencil formats. Specifically, it states: + * + * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or + * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. + * + * Image views with both depth and stencil aspects are only valid for + * render target attachments, in which case + * cmd_buffer_emit_depth_stencil() will pick out both the depth and + * stencil surfaces from the underlying surface. + */ + return &image->depth_surface; + } else if (image->format->has_depth) { + return &image->depth_surface; + } else if (image->format->has_stencil) { + return &image->stencil_surface; + } + /* fallthrough */ + default: + unreachable("image does not have aspect"); + return NULL; + } +} + +static void +image_param_defaults(struct brw_image_param *param) +{ + memset(param, 0, sizeof *param); + /* Set the swizzling shifts to all-ones to effectively disable swizzling -- + * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more + * detailed explanation of these parameters. + */ + param->swizzling[0] = 0xff; + param->swizzling[1] = 0xff; +} + +void +anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + const struct isl_surf *surf = &view->image->color_surface.isl; + const int cpp = isl_format_get_layout(surf->format)->bs; + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(surf); + + param->size[0] = view->extent.width; + param->size[1] = view->extent.height; + if (surf->dim == ISL_SURF_DIM_3D) { + param->size[2] = view->extent.depth; + } else { + param->size[2] = surf->logical_level0_px.array_len - view->base_layer; + } + + isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, + ¶m->offset[0], ¶m->offset[1]); + + param->stride[0] = cpp; + param->stride[1] = surf->row_pitch / cpp; + + if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { + param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); + param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); + } else { + param->stride[2] = 0; + param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); + } + + switch (surf->tiling) { + case ISL_TILING_LINEAR: + /* image_param_defaults is good enough */ + break; + + case ISL_TILING_X: + /* An X tile is a rectangular block of 512x8 bytes. */ + param->tiling[0] = util_logbase2(512 / cpp); + param->tiling[1] = util_logbase2(8); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shifts required to swizzle bits 9 and 10 of the memory + * address with bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 4; + } + break; + + case ISL_TILING_Y0: + /* The layout of a Y-tiled surface in memory isn't really fundamentally + * different to the layout of an X-tiled surface, we simply pretend that + * the surface is broken up in a number of smaller 16Bx32 tiles, each + * one arranged in X-major order just like is the case for X-tiling. + */ + param->tiling[0] = util_logbase2(16 / cpp); + param->tiling[1] = util_logbase2(32); + + if (device->isl_dev.has_bit6_swizzling) { + /* Right shift required to swizzle bit 9 of the memory address with + * bit 6. + */ + param->swizzling[0] = 3; + param->swizzling[1] = 0xff; + } + break; + + default: + assert(!"Unhandled storage image tiling"); + } + + /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The + * address calculation algorithm (emit_address_calculation() in + * brw_fs_surface_builder.cpp) handles this as a sort of tiling with + * modulus equal to the LOD. + */ + param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? + view->base_mip : 0); +} + +void +anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param) +{ + image_param_defaults(param); + + param->stride[0] = isl_format_layouts[view->format].bs; + param->size[0] = view->range / param->stride[0]; +} diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c new file mode 100644 index 0000000..d95d9af --- /dev/null +++ b/src/intel/vulkan/anv_intel.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + VkImage image_h; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = pCreateInfo->strideInBytes, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->format, + .extent = pCreateInfo->extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, &image_h); + + image = anv_image_from_handle(image_h); + image->bo = &mem->bo; + image->offset = 0; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, mem); + + return result; +} diff --git a/src/intel/vulkan/anv_meta.c b/src/intel/vulkan/anv_meta.c new file mode 100644 index 0000000..82944ea --- /dev/null +++ b/src/intel/vulkan/anv_meta.c @@ -0,0 +1,169 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" + +struct anv_render_pass anv_meta_dummy_renderpass = {0}; + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); + + state->dynamic_mask = dynamic_mask; + anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, + dynamic_mask); +} + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, + state->dynamic_mask); + cmd_buffer->state.dirty |= state->dynamic_mask; + + /* Since we've used the pipeline with the VS disabled, set + * need_query_wa. See CmdBeginQuery. + */ + cmd_buffer->state.need_query_wa = true; +} + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image) +{ + switch (image->type) { + case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; + case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; + case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; + default: + unreachable("bad VkImageViewType"); + } +} + +/** + * When creating a destination VkImageView, this function provides the needed + * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. + */ +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset) +{ + switch (dest_image->type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return dest_subresource->baseArrayLayer; + case VK_IMAGE_TYPE_3D: + /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, + * but meta does it anyway. When doing so, we translate the + * destination's z offset into an array offset. + */ + return dest_offset->z; + default: + assert(!"bad VkImageType"); + return 0; + } +} + +static void * +meta_alloc(void* _device, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void * +meta_realloc(void* _device, void *original, size_t size, size_t alignment, + VkSystemAllocationScope allocationScope) +{ + struct anv_device *device = _device; + return device->alloc.pfnReallocation(device->alloc.pUserData, original, + size, alignment, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); +} + +static void +meta_free(void* _device, void *data) +{ + struct anv_device *device = _device; + return device->alloc.pfnFree(device->alloc.pUserData, data); +} + +VkResult +anv_device_init_meta(struct anv_device *device) +{ + VkResult result; + + device->meta_state.alloc = (VkAllocationCallbacks) { + .pUserData = device, + .pfnAllocation = meta_alloc, + .pfnReallocation = meta_realloc, + .pfnFree = meta_free, + }; + + result = anv_device_init_meta_clear_state(device); + if (result != VK_SUCCESS) + goto fail_clear; + + result = anv_device_init_meta_resolve_state(device); + if (result != VK_SUCCESS) + goto fail_resolve; + + result = anv_device_init_meta_blit_state(device); + if (result != VK_SUCCESS) + goto fail_blit; + + return VK_SUCCESS; + +fail_blit: + anv_device_finish_meta_resolve_state(device); +fail_resolve: + anv_device_finish_meta_clear_state(device); +fail_clear: + return result; +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + anv_device_finish_meta_resolve_state(device); + anv_device_finish_meta_clear_state(device); + anv_device_finish_meta_blit_state(device); +} diff --git a/src/intel/vulkan/anv_meta.h b/src/intel/vulkan/anv_meta.h new file mode 100644 index 0000000..d33e9e6 --- /dev/null +++ b/src/intel/vulkan/anv_meta.h @@ -0,0 +1,75 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ANV_META_VERTEX_BINDING_COUNT 2 + +struct anv_meta_saved_state { + struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + + /** + * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic + * state. + */ + uint32_t dynamic_mask; + struct anv_dynamic_state dynamic; +}; + +VkResult anv_device_init_meta_clear_state(struct anv_device *device); +void anv_device_finish_meta_clear_state(struct anv_device *device); + +VkResult anv_device_init_meta_resolve_state(struct anv_device *device); +void anv_device_finish_meta_resolve_state(struct anv_device *device); + +VkResult anv_device_init_meta_blit_state(struct anv_device *device); +void anv_device_finish_meta_blit_state(struct anv_device *device); + +void +anv_meta_save(struct anv_meta_saved_state *state, + const struct anv_cmd_buffer *cmd_buffer, + uint32_t dynamic_mask); + +void +anv_meta_restore(const struct anv_meta_saved_state *state, + struct anv_cmd_buffer *cmd_buffer); + +VkImageViewType +anv_meta_get_view_type(const struct anv_image *image); + +uint32_t +anv_meta_get_iview_layer(const struct anv_image *dest_image, + const VkImageSubresourceLayers *dest_subresource, + const VkOffset3D *dest_offset); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_meta_blit.c b/src/intel/vulkan/anv_meta_blit.c new file mode 100644 index 0000000..07ebcbc --- /dev/null +++ b/src/intel/vulkan/anv_meta_blit.c @@ -0,0 +1,1442 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "nir/nir_builder.h" + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static nir_shader * +build_nir_vertex_shader(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); + + nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_pos"); + pos_in->data.location = VERT_ATTRIB_GENERIC0; + nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "gl_Position"); + pos_out->data.location = VARYING_SLOT_POS; + nir_copy_var(&b, pos_out, pos_in); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "a_tex_pos"); + tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; + nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "v_tex_pos"); + tex_pos_out->data.location = VARYING_SLOT_VAR0; + tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; + nir_copy_var(&b, tex_pos_out, tex_pos_in); + + return b.shader; +} + +static nir_shader * +build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); + + nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, + vec4, "v_tex_pos"); + tex_pos_in->data.location = VARYING_SLOT_VAR0; + + /* Swizzle the array index which comes in as Z coordinate into the right + * position. + */ + unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; + nir_ssa_def *const tex_pos = + nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, + (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); + + const struct glsl_type *sampler_type = + glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, + glsl_get_base_type(vec4)); + nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, + sampler_type, "s_tex"); + sampler->data.descriptor_set = 0; + sampler->data.binding = 0; + + nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); + tex->sampler_dim = tex_dim; + tex->op = nir_texop_tex; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(tex_pos); + tex->dest_type = nir_type_float; /* TODO */ + tex->is_array = glsl_sampler_type_is_array(sampler_type); + tex->coord_components = tex_pos->num_components; + tex->texture = nir_deref_var_create(tex, sampler); + tex->sampler = nir_deref_var_create(tex, sampler); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, + vec4, "f_color"); + color_out->data.location = FRAG_RESULT_DATA0; + nir_store_var(&b, color_out, &tex->dest.ssa, 4); + + return b.shader; +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_meta_saved_state *saved_state) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT)); +} + +/* Returns the user-provided VkBufferImageCopy::imageOffset in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkOffset3D +meta_region_offset_el(const struct anv_image * image, + const struct VkOffset3D * offset) +{ + const struct isl_format_layout * isl_layout = image->format->isl_layout; + return (VkOffset3D) { + .x = offset->x / isl_layout->bw, + .y = offset->y / isl_layout->bh, + .z = offset->z / isl_layout->bd, + }; +} + +/* Returns the user-provided VkBufferImageCopy::imageExtent in units of + * elements rather than texels. One element equals one texel or one block + * if Image is uncompressed or compressed, respectively. + */ +static struct VkExtent3D +meta_region_extent_el(const VkFormat format, + const struct VkExtent3D * extent) +{ + const struct isl_format_layout * isl_layout = + anv_format_for_vk_format(format)->isl_layout; + return (VkExtent3D) { + .width = DIV_ROUND_UP(extent->width , isl_layout->bw), + .height = DIV_ROUND_UP(extent->height, isl_layout->bh), + .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), + }; +} + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *src_image, + struct anv_image_view *src_iview, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_image *dest_image, + struct anv_image_view *dest_iview, + VkOffset3D dest_offset, + VkExtent3D dest_extent, + VkFilter blit_filter) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; + + struct blit_vb_data { + float pos[2]; + float tex_coord[3]; + } *vb_data; + + assert(src_image->samples == dest_image->samples); + + unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct anv_vue_header)); + vb_data = vb_state.map + sizeof(struct anv_vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src_iview->extent.width, + (float)src_offset.y / (float)src_iview->extent.height, + (float)src_offset.z / (float)src_iview->extent.depth, + }, + }; + + anv_state_clflush(vb_state); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->dynamic_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct anv_vue_header), + }); + + VkSampler sampler; + ANV_CALL(CreateSampler)(anv_device_to_handle(device), + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = blit_filter, + .minFilter = blit_filter, + }, &cmd_buffer->pool->alloc, &sampler); + + VkDescriptorSet set; + anv_AllocateDescriptorSets(anv_device_to_handle(device), + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout + }, &set); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(dest_iview), + }, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .layers = 1 + }, &cmd_buffer->pool->alloc, &fb); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.blit.render_pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, VK_SUBPASS_CONTENTS_INLINE); + + VkPipeline pipeline; + + switch (src_image->type) { + case VK_IMAGE_TYPE_1D: + pipeline = device->meta_state.blit.pipeline_1d_src; + break; + case VK_IMAGE_TYPE_2D: + pipeline = device->meta_state.blit.pipeline_2d_src; + break; + case VK_IMAGE_TYPE_3D: + pipeline = device->meta_state.blit.pipeline_3d_src; + break; + default: + unreachable(!"bad VkImageType"); + } + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + } + + anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = dest_iview->extent.width, + .height = dest_iview->extent.height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroySampler(anv_device_to_handle(device), sampler, + &cmd_buffer->pool->alloc); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb, + &cmd_buffer->pool->alloc); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_meta_saved_state *saved_state) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkFormat +vk_format_for_size(int bs) +{ + /* Note: We intentionally use the 4-channel formats whenever we can. + * This is so that, when we do a RGB <-> RGBX copy, the two formats will + * line up even though one of them is 3/4 the size of the other. + */ + switch (bs) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format block size"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = 0, + .flags = 0, + }; + + VkImage src_image; + image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &src_image); + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(src_image), + &src_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + anv_image_from_handle(dest_image), + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + VK_FILTER_NEAREST); + + anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); + anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); +} + +void anv_CmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; + uint64_t copy_size = pRegions[r].size; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int bs = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(src_offset % bs == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(dest_offset % bs == 0); + + fs = ffs(pRegions[r].size) - 1; + if (fs != -1) + bs = MIN2(bs, 1 << fs); + assert(pRegions[r].size % bs == 0); + + VkFormat copy_format = vk_format_for_size(bs); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; + while (copy_size >= max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * bs; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / bs, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + /* We can't quite grab a full block because the state stream needs a + * little data at the top to build its linked list. + */ + const uint32_t max_update_size = + cmd_buffer->device->dynamic_state_block_pool.block_size - 64; + + assert(max_update_size < (1 << 14) * 4); + + while (dataSize) { + const uint32_t copy_size = MIN2(dataSize, max_update_size); + + struct anv_state tmp_data = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); + + memcpy(tmp_data.map, pData, copy_size); + + VkFormat format; + int bs; + if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + do_buffer_copy(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, + tmp_data.offset, + dst_buffer->bo, dst_buffer->offset + dstOffset, + copy_size / bs, 1, format); + + dataSize -= copy_size; + dstOffset += copy_size; + pData = (void *)pData + copy_size; + } +} + +static VkFormat +choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + struct isl_surf *surf = + &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; + + /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT + * formats for the source and destination image views. + * + * From the Vulkan spec (2015-12-30): + * + * vkCmdCopyImage performs image copies in a similar manner to a host + * memcpy. It does not perform general-purpose conversions such as + * scaling, resizing, blending, color-space conversion, or format + * conversions. Rather, it simply copies raw image data. vkCmdCopyImage + * can copy between images with different formats, provided the formats + * are compatible as defined below. + * + * [The spec later defines compatibility as having the same number of + * bytes per block]. + */ + return vk_format_for_size(isl_format_layouts[surf->format].bs); +} + +static VkFormat +choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) +{ + assert(__builtin_popcount(aspect) == 1); + + /* vkCmdCopy* commands behave like memcpy. Therefore we choose + * compatable UINT formats for the source and destination image views. + * + * For the buffer, we go back to the original image format and get a + * the format as if it were linear. This way, for RGB formats, we get + * an RGB format here even if the tiled image is RGBA. XXX: This doesn't + * work if the buffer is the destination. + */ + enum isl_format linear_format = anv_get_isl_format(format, aspect, + VK_IMAGE_TILING_LINEAR, + NULL); + + return vk_format_for_size(isl_format_layouts[linear_format].bs); +} + +void anv_CmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdCopyImage can be used to copy image data between multisample + * images, but both images must have the same number of samples. + */ + assert(src_image->samples == dest_image->samples); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + assert(pRegions[r].srcSubresource.aspectMask == + pRegions[r].dstSubresource.aspectMask); + + VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; + + VkFormat src_format = choose_iview_format(src_image, aspect); + VkFormat dst_format = choose_iview_format(dest_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = pRegions[r].dstSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffset.x, + .y = pRegions[r].dstOffset.y, + .z = 0, + }; + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].srcSubresource.layerCount == 1 && + pRegions[r].dstSubresource.layerCount == 1); + num_slices = pRegions[r].extent.depth; + } else { + assert(pRegions[r].srcSubresource.layerCount == + pRegions[r].dstSubresource.layerCount); + assert(pRegions[r].extent.depth == 1); + num_slices = pRegions[r].dstSubresource.layerCount; + } + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffset); + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].srcOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dst_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + src_offset, + pRegions[r].extent, + dest_image, &dest_iview, + dest_offset, + pRegions[r].extent, + VK_FILTER_NEAREST); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + struct anv_meta_saved_state saved_state; + + /* From the Vulkan 1.0 spec: + * + * vkCmdBlitImage must not be used for multisampled source or + * destination images. Use vkCmdResolveImage for this purpose. + */ + assert(src_image->samples == 1); + assert(dest_image->samples == 1); + + anv_finishme("respect VkFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = pRegions[r].srcSubresource.aspectMask, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + const VkOffset3D dest_offset = { + .x = pRegions[r].dstOffsets[0].x, + .y = pRegions[r].dstOffsets[0].y, + .z = 0, + }; + + if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || + pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || + pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || + pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) + anv_finishme("FINISHME: Allow flipping in blits"); + + const VkExtent3D dest_extent = { + .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, + .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, + }; + + const VkExtent3D src_extent = { + .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, + .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, + }; + + const uint32_t dest_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, + &pRegions[r].dstOffsets[0]); + + if (pRegions[r].srcSubresource.layerCount > 1) + anv_finishme("FINISHME: copy multiple array layers"); + + if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || + pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) + anv_finishme("FINISHME: copy multiple depth layers"); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = destImage, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_array_slice, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + src_image, &src_iview, + pRegions[r].srcOffsets[0], src_extent, + dest_image, &dest_iview, + dest_offset, dest_extent, + filter); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +static struct anv_image * +make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, + VkImageUsageFlags usage, + VkImageType image_type, + const VkAllocationCallbacks *alloc, + const VkBufferImageCopy *copy) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); + + VkExtent3D extent = copy->imageExtent; + if (copy->bufferRowLength) + extent.width = copy->bufferRowLength; + if (copy->bufferImageHeight) + extent.height = copy->bufferImageHeight; + extent.depth = 1; + extent = meta_region_extent_el(format, &extent); + + VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; + VkFormat buffer_format = choose_buffer_format(format, aspect); + + VkImage vk_image; + VkResult result = anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = buffer_format, + .extent = extent, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = usage, + .flags = 0, + }, alloc, &vk_image); + assert(result == VK_SUCCESS); + + ANV_FROM_HANDLE(anv_image, image, vk_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + image->bo = buffer->bo; + image->offset = buffer->offset + copy->bufferOffset; + + return image; +} + +void anv_CmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(dest_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(dest_image, aspect); + + struct anv_image *src_image = + make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, + VK_IMAGE_USAGE_SAMPLED_BIT, + dest_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + const uint32_t dest_base_array_slice = + anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, + &pRegions[r].imageOffset); + + unsigned num_slices_3d = pRegions[r].imageExtent.depth; + unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; + unsigned slice_3d = 0; + unsigned slice_array = 0; + while (slice_3d < num_slices_3d && slice_array < num_slices_array) { + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + uint32_t img_x = 0; + uint32_t img_y = 0; + uint32_t img_o = 0; + if (isl_format_is_compressed(dest_image->format->isl_format)) + isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, + &dest_image->color_surface.isl, + pRegions[r].imageSubresource.mipLevel, + pRegions[r].imageSubresource.baseArrayLayer + slice_array, + pRegions[r].imageOffset.z + slice_3d, + &img_o, &img_x, &img_y); + + VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); + dest_offset_el.x += img_x; + dest_offset_el.y += img_y; + dest_offset_el.z = 0; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = anv_meta_get_view_type(dest_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_array_slice + + slice_array + slice_3d, + .layerCount = 1 + }, + }, + cmd_buffer, img_o); + + const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, + &pRegions[r].imageExtent); + + meta_emit_blit(cmd_buffer, + src_image, + &src_iview, + (VkOffset3D){0, 0, 0}, + img_extent_el, + dest_image, + &dest_iview, + dest_offset_el, + img_extent_el, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + src_image->offset += src_image->extent.width * + src_image->extent.height * + src_image->format->isl_layout->bs; + + if (dest_image->type == VK_IMAGE_TYPE_3D) + slice_3d++; + else + slice_array++; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(src_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_meta_saved_state saved_state; + + + /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to + * VK_SAMPLE_COUNT_1_BIT." + */ + assert(src_image->samples == 1); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + + VkFormat image_format = choose_iview_format(src_image, aspect); + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = anv_meta_get_view_type(src_image), + .format = image_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }, + }, + cmd_buffer, 0); + + struct anv_image *dest_image = + make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + src_image->type, &cmd_buffer->pool->alloc, + &pRegions[r]); + + unsigned num_slices; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(pRegions[r].imageSubresource.layerCount == 1); + num_slices = pRegions[r].imageExtent.depth; + } else { + assert(pRegions[r].imageExtent.depth == 1); + num_slices = pRegions[r].imageSubresource.layerCount; + } + + for (unsigned slice = 0; slice < num_slices; slice++) { + VkOffset3D src_offset = pRegions[r].imageOffset; + src_offset.z += slice; + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + meta_emit_blit(cmd_buffer, + anv_image_from_handle(srcImage), + &src_iview, + src_offset, + pRegions[r].imageExtent, + dest_image, + &dest_iview, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + VK_FILTER_NEAREST); + + /* Once we've done the blit, all of the actual information about + * the image is embedded in the command buffer so we can just + * increment the offset directly in the image effectively + * re-binding it to different backing memory. + */ + dest_image->offset += dest_image->extent.width * + dest_image->extent.height * + src_image->format->isl_layout->bs; + } + + anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), + &cmd_buffer->pool->alloc); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void +anv_device_finish_meta_blit_state(struct anv_device *device) +{ + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_3d_src, + &device->meta_state.alloc); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); +} + +VkResult +anv_device_init_meta_blit_state(struct anv_device *device) +{ + VkResult result; + + result = anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveAttachmentCount = 1, + .pPreserveAttachments = (uint32_t[]) { 0 }, + }, + .dependencyCount = 0, + }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); + if (result != VK_SUCCESS) + goto fail; + + /* We don't use a vertex shader for blitting, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + struct anv_shader_module vs = { + .nir = build_nir_vertex_shader(), + }; + + struct anv_shader_module fs_1d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), + }; + + struct anv_shader_module fs_2d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), + }; + + struct anv_shader_module fs_3d = { + .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), + }; + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = 0, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + { + .binding = 1, + .stride = 5 * sizeof(float), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), + &ds_layout_info, + &device->meta_state.alloc, + &device->meta_state.blit.ds_layout); + if (result != VK_SUCCESS) + goto fail_render_pass; + + result = anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); + if (result != VK_SUCCESS) + goto fail_descriptor_set_layout; + + VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs), + .pName = "main", + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ + .pName = "main", + .pSpecializationInfo = NULL + }, + }; + + const VkGraphicsPipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = ARRAY_SIZE(pipeline_shader_stages), + .pStages = pipeline_shader_stages, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .colorWriteMask = + VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, + } + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + .renderPass = device->meta_state.blit.render_pass, + .subpass = 0, + }; + + const struct anv_graphics_pipeline_create_info anv_pipeline_info = { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_layout; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_1d; + + pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); + result = anv_graphics_pipeline_create(anv_device_to_handle(device), + VK_NULL_HANDLE, + &vk_pipeline_info, &anv_pipeline_info, + &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); + if (result != VK_SUCCESS) + goto fail_pipeline_2d; + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + + return VK_SUCCESS; + + fail_pipeline_2d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_2d_src, + &device->meta_state.alloc); + + fail_pipeline_1d: + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline_1d_src, + &device->meta_state.alloc); + + fail_pipeline_layout: + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout, + &device->meta_state.alloc); + fail_descriptor_set_layout: + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout, + &device->meta_state.alloc); + fail_render_pass: + anv_DestroyRenderPass(anv_device_to_handle(device), + device->meta_state.blit.render_pass, + &device->meta_state.alloc); + + ralloc_free(vs.nir); + ralloc_free(fs_1d.nir); + ralloc_free(fs_2d.nir); + ralloc_free(fs_3d.nir); + fail: + return result; +} diff --git a/src/intel/vulkan/anv_meta_clear.c b/src/intel/vulkan/anv_meta_clear.c new file mode 100644 index 0000000..739ae09 --- /dev/null +++ b/src/intel/vulkan/anv_meta_clear.c @@ -0,0 +1,1098 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** Vertex attributes for color clears. */ +struct color_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + VkClearColorValue color; +}; + +/** Vertex attributes for depthstencil clears. */ +struct depthstencil_clear_vattrs { + struct anv_vue_header vue_header; + float position[2]; /*<< 3DPRIM_RECTLIST */ +}; + +static void +meta_clear_begin(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR) | + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_clear_end(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static void +build_color_shaders(struct nir_shader **out_vs, + struct nir_shader **out_fs, + uint32_t frag_output) +{ + nir_builder vs_b; + nir_builder fs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); + fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + const struct glsl_type *color_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_variable *vs_in_color = + nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, + "a_color"); + vs_in_color->data.location = VERT_ATTRIB_GENERIC1; + + nir_variable *vs_out_color = + nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, + "v_color"); + vs_out_color->data.location = VARYING_SLOT_VAR0; + vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; + + nir_variable *fs_in_color = + nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, + "v_color"); + fs_in_color->data.location = vs_out_color->data.location; + fs_in_color->data.interpolation = vs_out_color->data.interpolation; + + nir_variable *fs_out_color = + nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, + "f_color"); + fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + nir_copy_var(&vs_b, vs_out_color, vs_in_color); + nir_copy_var(&fs_b, fs_out_color, fs_in_color); + + *out_vs = vs_b.shader; + *out_fs = fs_b.shader; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t samples, + struct nir_shader *vs_nir, + struct nir_shader *fs_nir, + const VkPipelineVertexInputStateCreateInfo *vi_state, + const VkPipelineDepthStencilStateCreateInfo *ds_state, + const VkPipelineColorBlendStateCreateInfo *cb_state, + const VkAllocationCallbacks *alloc, + bool use_repclear, + struct anv_pipeline **pipeline) +{ + VkDevice device_h = anv_device_to_handle(device); + VkResult result; + + struct anv_shader_module vs_m = { .nir = vs_nir }; + struct anv_shader_module fs_m = { .nir = fs_nir }; + + VkPipeline pipeline_h = VK_NULL_HANDLE; + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = fs_nir ? 2 : 1, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = anv_shader_module_to_handle(&vs_m), + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_m), + .pName = "main", + }, + }, + .pVertexInputState = vi_state, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .pViewports = NULL, /* dynamic */ + .scissorCount = 1, + .pScissors = NULL, /* dynamic */ + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + .depthBiasEnable = false, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = samples, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { ~0 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pDepthStencilState = ds_state, + .pColorBlendState = cb_state, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + /* The meta clear pipeline declares all state as dynamic. + * As a consequence, vkCmdBindPipeline writes no dynamic state + * to the cmd buffer. Therefore, at the end of the meta clear, + * we need only restore dynamic state was vkCmdSet. + */ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 9, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }, + }, + .flags = 0, + .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = MAX_RTS, + .use_repclear = use_repclear, + .disable_viewport = true, + .disable_vs = true, + .use_rectlist = true + }, + alloc, + &pipeline_h); + + ralloc_free(vs_nir); + ralloc_free(fs_nir); + + *pipeline = anv_pipeline_from_handle(pipeline_h); + + return result; +} + +static VkResult +create_color_pipeline(struct anv_device *device, + uint32_t samples, + uint32_t frag_output, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + struct nir_shader *fs_nir; + build_color_shaders(&vs_nir, &fs_nir, frag_output); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct color_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct color_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, position), + }, + { + /* Color */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offset = offsetof(struct color_clear_vattrs, color), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; + blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { + .blendEnable = false, + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = MAX_RTS, + .pAttachments = blend_attachment_state + }; + + /* Disable repclear because we do not want the compiler to replace the + * shader. We need the shader to write to the specified color attachment, + * but the repclear shader writes to all color attachments. + */ + return + create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ false, pipeline); +} + +static void +destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) +{ + if (!pipeline) + return; + + ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), + anv_pipeline_to_handle(pipeline), + &device->meta_state.alloc); +} + +void +anv_device_finish_meta_clear_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + destroy_pipeline(device, state->clear[i].color_pipelines[j]); + } + + destroy_pipeline(device, state->clear[i].depth_only_pipeline); + destroy_pipeline(device, state->clear[i].stencil_only_pipeline); + destroy_pipeline(device, state->clear[i].depthstencil_pipeline); + } +} + +static void +emit_color_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att]; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + struct anv_pipeline *pipeline = + device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; + VkClearColorValue clear_value = clear_att->clearValue.color; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); + + assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); + assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(clear_att->colorAttachment < subpass->color_count); + + const struct color_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + .color = clear_value, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + .color = clear_value, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + + +static void +build_depthstencil_shader(struct nir_shader **out_vs) +{ + nir_builder vs_b; + + nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); + + vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); + + const struct glsl_type *position_type = glsl_vec4_type(); + + nir_variable *vs_in_pos = + nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, + "a_position"); + vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; + + nir_variable *vs_out_pos = + nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, + "gl_Position"); + vs_out_pos->data.location = VARYING_SLOT_POS; + + nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); + + *out_vs = vs_b.shader; +} + +static VkResult +create_depthstencil_pipeline(struct anv_device *device, + VkImageAspectFlags aspects, + uint32_t samples, + struct anv_pipeline **pipeline) +{ + struct nir_shader *vs_nir; + + build_depthstencil_shader(&vs_nir); + + const VkPipelineVertexInputStateCreateInfo vi_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct depthstencil_clear_vattrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 2, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct depthstencil_clear_vattrs, position), + }, + }, + }; + + const VkPipelineDepthStencilStateCreateInfo ds_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), + .depthBoundsTestEnable = false, + .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), + .front = { + .passOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .writeMask = UINT32_MAX, + .reference = 0, /* dynamic */ + }, + .back = { 0 /* dont care */ }, + }; + + const VkPipelineColorBlendStateCreateInfo cb_state = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 0, + .pAttachments = NULL, + }; + + return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, + &cb_state, &device->meta_state.alloc, + /*use_repclear*/ true, pipeline); +} + +static void +emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_meta_state *meta_state = &device->meta_state; + const struct anv_subpass *subpass = cmd_buffer->state.subpass; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const uint32_t pass_att = subpass->depth_stencil_attachment; + const struct anv_image_view *iview = fb->attachments[pass_att]; + const uint32_t samples = iview->image->samples; + const uint32_t samples_log2 = ffs(samples) - 1; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + + assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); + assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || + aspects == VK_IMAGE_ASPECT_STENCIL_BIT || + aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + assert(pass_att != VK_ATTACHMENT_UNUSED); + + const struct depthstencil_clear_vattrs vertex_data[3] = { + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y, + }, + }, + { + .vue_header = { 0 }, + .position = { + clear_rect->rect.offset.x + clear_rect->rect.extent.width, + clear_rect->rect.offset.y + clear_rect->rect.extent.height, + }, + }, + }; + + struct anv_state state = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &device->dynamic_state_block_pool.bo, + .offset = state.offset, + }; + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + + /* Ignored when clearing only stencil. */ + .minDepth = clear_value.depth, + .maxDepth = clear_value.depth, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + } + }); + + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, + clear_value.stencil); + } + + ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, + (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, + (VkDeviceSize[]) { 0 }); + + struct anv_pipeline *pipeline; + switch (aspects) { + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; + break; + case VK_IMAGE_ASPECT_DEPTH_BIT: + pipeline = meta_state->clear[samples_log2].depth_only_pipeline; + break; + case VK_IMAGE_ASPECT_STENCIL_BIT: + pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; + break; + default: + unreachable("expected depth or stencil aspect"); + } + + if (cmd_buffer->state.pipeline != pipeline) { + ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + anv_pipeline_to_handle(pipeline)); + } + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); +} + +VkResult +anv_device_init_meta_clear_state(struct anv_device *device) +{ + VkResult res; + struct anv_meta_state *state = &device->meta_state; + + zero(device->meta_state.clear); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { + uint32_t samples = 1 << i; + + for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { + res = create_color_pipeline(device, samples, /* frag_output */ j, + &state->clear[i].color_pipelines[j]); + if (res != VK_SUCCESS) + goto fail; + } + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT, samples, + &state->clear[i].depth_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].stencil_only_pipeline); + if (res != VK_SUCCESS) + goto fail; + + res = create_depthstencil_pipeline(device, + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT, samples, + &state->clear[i].depthstencil_pipeline); + if (res != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; + +fail: + anv_device_finish_meta_clear_state(device); + return res; +} + +/** + * The parameters mean that same as those in vkCmdClearAttachments. + */ +static void +emit_clear(struct anv_cmd_buffer *cmd_buffer, + const VkClearAttachment *clear_att, + const VkClearRect *clear_rect) +{ + if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + emit_color_clear(cmd_buffer, clear_att, clear_rect); + } else { + assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + } +} + +static bool +subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) +{ + const struct anv_cmd_state *cmd_state = &cmd_buffer->state; + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + if (cmd_state->attachments[a].pending_clear_aspects) { + return true; + } + } + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + return true; + } + + return false; +} + +/** + * Emit any pending attachment clears for the current subpass. + * + * @see anv_attachment_state::pending_clear_aspects + */ +void +anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_meta_saved_state saved_state; + + if (!subpass_needs_clear(cmd_buffer)) + return; + + meta_clear_begin(&saved_state, cmd_buffer); + + if (cmd_state->framebuffer->layers > 1) + anv_finishme("clearing multi-layer framebuffer"); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { fb->width, fb->height }, + }, + .baseArrayLayer = 0, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { + uint32_t a = cmd_state->subpass->color_attachments[i]; + + if (!cmd_state->attachments[a].pending_clear_aspects) + continue; + + assert(cmd_state->attachments[a].pending_clear_aspects == + VK_IMAGE_ASPECT_COLOR_BIT); + + VkClearAttachment clear_att = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, /* Use attachment index relative to subpass */ + .clearValue = cmd_state->attachments[a].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[a].pending_clear_aspects = 0; + } + + uint32_t ds = cmd_state->subpass->depth_stencil_attachment; + + if (ds != VK_ATTACHMENT_UNUSED && + cmd_state->attachments[ds].pending_clear_aspects) { + + VkClearAttachment clear_att = { + .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, + .clearValue = cmd_state->attachments[ds].clear_value, + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + cmd_state->attachments[ds].pending_clear_aspects = 0; + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout image_layout, + const VkClearValue *clear_value, + uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + VkDevice device_h = anv_device_to_handle(cmd_buffer->device); + + for (uint32_t r = 0; r < range_count; r++) { + const VkImageSubresourceRange *range = &ranges[r]; + + for (uint32_t l = 0; l < range->levelCount; ++l) { + for (uint32_t s = 0; s < range->layerCount; ++s) { + struct anv_image_view iview; + anv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(image), + .viewType = anv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel + l, + .levelCount = 1, + .baseArrayLayer = range->baseArrayLayer + s, + .layerCount = 1 + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&iview), + }, + .width = iview.extent.width, + .height = iview.extent.height, + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb); + + VkAttachmentDescription att_desc = { + .format = iview.vk_format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = image_layout, + .finalLayout = image_layout, + }; + + VkSubpassDescription subpass_desc = { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 0, + .pColorAttachments = NULL, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = NULL, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }; + + const VkAttachmentReference att_ref = { + .attachment = 0, + .layout = image_layout, + }; + + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + subpass_desc.colorAttachmentCount = 1; + subpass_desc.pColorAttachments = &att_ref; + } else { + subpass_desc.pDepthStencilAttachment = &att_ref; + } + + VkRenderPass pass; + anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &att_desc, + .subpassCount = 1, + .pSubpasses = &subpass_desc, + }, + &cmd_buffer->pool->alloc, + &pass); + + ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = iview.extent.width, + .height = iview.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_value, + }; + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { iview.extent.width, iview.extent.height }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ + }; + + emit_clear(cmd_buffer, &clear_att, &clear_rect); + + ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); + ANV_CALL(DestroyRenderPass)(device_h, pass, + &cmd_buffer->pool->alloc); + ANV_CALL(DestroyFramebuffer)(device_h, fb, + &cmd_buffer->pool->alloc); + } + } + } +} + +void anv_CmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pColor, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue* pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_image, image, image_h); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + anv_cmd_clear_image(cmd_buffer, image, imageLayout, + (const VkClearValue *) pDepthStencil, + rangeCount, pRanges); + + meta_clear_end(&saved_state, cmd_buffer); +} + +void anv_CmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + /* FINISHME: We can do better than this dumb loop. It thrashes too much + * state. + */ + for (uint32_t a = 0; a < attachmentCount; ++a) { + for (uint32_t r = 0; r < rectCount; ++r) { + emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); + } + } + + meta_clear_end(&saved_state, cmd_buffer); +} + +static void +do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat fill_format, uint32_t data) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = fill_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }; + + VkImage dest_image; + image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + anv_CreateImage(vk_device, &image_info, + &cmd_buffer->pool->alloc, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + const VkClearValue clear_value = { + .color = { + .uint32 = { data, data, data, data } + } + }; + + const VkImageSubresourceRange range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &clear_value, 1, &range); +} + +void anv_CmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); + struct anv_meta_saved_state saved_state; + + meta_clear_begin(&saved_state, cmd_buffer); + + VkFormat format; + int bs; + if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32B32A32_UINT; + bs = 16; + } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { + format = VK_FORMAT_R32G32_UINT; + bs = 8; + } else { + assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); + format = VK_FORMAT_R32_UINT; + bs = 4; + } + + /* This is maximum possible width/height our HW can handle */ + const uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; + while (fillSize > max_fill_size) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, max_surface_dim, format, data); + fillSize -= max_fill_size; + dstOffset += max_fill_size; + } + + uint64_t height = fillSize / (max_surface_dim * bs); + assert(height < max_surface_dim); + if (height != 0) { + const uint64_t rect_fill_size = height * max_surface_dim * bs; + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + max_surface_dim, height, format, data); + fillSize -= rect_fill_size; + dstOffset += rect_fill_size; + } + + if (fillSize != 0) { + do_buffer_fill(cmd_buffer, dst_buffer->bo, + dst_buffer->offset + dstOffset, + fillSize / bs, 1, format, data); + } + + meta_clear_end(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_meta_resolve.c b/src/intel/vulkan/anv_meta_resolve.c new file mode 100644 index 0000000..ea5020c --- /dev/null +++ b/src/intel/vulkan/anv_meta_resolve.c @@ -0,0 +1,867 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_meta.h" +#include "anv_private.h" +#include "nir/nir_builder.h" + +/** + * Vertex attributes used by all pipelines. + */ +struct vertex_attrs { + struct anv_vue_header vue_header; + float position[2]; /**< 3DPRIM_RECTLIST */ + float tex_position[2]; +}; + +static void +meta_resolve_save(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_save(saved_state, cmd_buffer, + (1 << VK_DYNAMIC_STATE_VIEWPORT) | + (1 << VK_DYNAMIC_STATE_SCISSOR)); + + cmd_buffer->state.dynamic.viewport.count = 0; + cmd_buffer->state.dynamic.scissor.count = 0; +} + +static void +meta_resolve_restore(struct anv_meta_saved_state *saved_state, + struct anv_cmd_buffer *cmd_buffer) +{ + anv_meta_restore(saved_state, cmd_buffer); +} + +static VkPipeline * +get_pipeline_h(struct anv_device *device, uint32_t samples) +{ + uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ + + assert(samples >= 2); + assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); + + return &device->meta_state.resolve.pipelines[i]; +} + +static nir_shader * +build_nir_vs(void) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + nir_builder b; + nir_variable *a_position; + nir_variable *v_position; + nir_variable *a_tex_position; + nir_variable *v_tex_position; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); + + a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_position"); + a_position->data.location = VERT_ATTRIB_GENERIC0; + + v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "gl_Position"); + v_position->data.location = VARYING_SLOT_POS; + + a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "a_tex_position"); + a_tex_position->data.location = VERT_ATTRIB_GENERIC1; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + nir_copy_var(&b, v_position, a_position); + nir_copy_var(&b, v_tex_position, a_tex_position); + + return b.shader; +} + +static nir_shader * +build_nir_fs(uint32_t num_samples) +{ + const struct glsl_type *vec4 = glsl_vec4_type(); + + const struct glsl_type *sampler2DMS = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, + /*is_shadow*/ false, + /*is_array*/ false, + GLSL_TYPE_FLOAT); + + nir_builder b; + nir_variable *u_tex; /* uniform sampler */ + nir_variable *v_position; /* vec4, varying fragment position */ + nir_variable *v_tex_position; /* vec4, varying texture coordinate */ + nir_variable *f_color; /* vec4, fragment output color */ + nir_ssa_def *accum; /* vec4, accumulation of sample values */ + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); + b.shader->info.name = ralloc_asprintf(b.shader, + "meta_resolve_fs_samples%02d", + num_samples); + + u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, + "u_tex"); + u_tex->data.descriptor_set = 0; + u_tex->data.binding = 0; + + v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_position"); + v_position->data.location = VARYING_SLOT_POS; + v_position->data.origin_upper_left = true; + + v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, + "v_tex_position"); + v_tex_position->data.location = VARYING_SLOT_VAR0; + + f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, + "f_color"); + f_color->data.location = FRAG_RESULT_DATA0; + + accum = nir_imm_vec4(&b, 0, 0, 0, 0); + + nir_ssa_def *tex_position_ivec = + nir_f2i(&b, nir_load_var(&b, v_tex_position)); + + for (uint32_t i = 0; i < num_samples; ++i) { + nir_tex_instr *tex; + + tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); + tex->texture = nir_deref_var_create(tex, u_tex); + tex->sampler = nir_deref_var_create(tex, u_tex); + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src = nir_src_for_ssa(tex_position_ivec); + tex->src[0].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 3; + nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + + accum = nir_fadd(&b, accum, &tex->dest.ssa); + } + + accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); + nir_store_var(&b, f_color, accum, /*writemask*/ 4); + + return b.shader; +} + +static VkResult +create_pass(struct anv_device *device) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + result = anv_CreateRenderPass(device_h, + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ + .samples = 1, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .pResolveAttachments = NULL, + .pDepthStencilAttachment = &(VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .preserveAttachmentCount = 0, + .pPreserveAttachments = NULL, + }, + .dependencyCount = 0, + }, + alloc, + &device->meta_state.resolve.pass); + + return result; +} + +static VkResult +create_pipeline(struct anv_device *device, + uint32_t num_samples, + VkShaderModule vs_module_h) +{ + VkResult result; + VkDevice device_h = anv_device_to_handle(device); + + struct anv_shader_module fs_module = { + .nir = build_nir_fs(num_samples), + }; + + if (!fs_module.nir) { + /* XXX: Need more accurate error */ + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto cleanup; + } + + result = anv_graphics_pipeline_create(device_h, + VK_NULL_HANDLE, + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vs_module_h, + .pName = "main", + }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = anv_shader_module_to_handle(&fs_module), + .pName = "main", + }, + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .stride = sizeof(struct vertex_attrs), + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX + }, + }, + .vertexAttributeDescriptionCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offset = offsetof(struct vertex_attrs, vue_header), + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, position), + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(struct vertex_attrs, tex_position), + }, + }, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = 1, + .sampleShadingEnable = false, + .pSampleMask = (VkSampleMask[]) { 0x1 }, + .alphaToCoverageEnable = false, + .alphaToOneEnable = false, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]) { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = device->meta_state.resolve.pipeline_layout, + .renderPass = device->meta_state.resolve.pass, + .subpass = 0, + }, + &(struct anv_graphics_pipeline_create_info) { + .color_attachment_count = -1, + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.alloc, + get_pipeline_h(device, num_samples)); + if (result != VK_SUCCESS) + goto cleanup; + + goto cleanup; + +cleanup: + ralloc_free(fs_module.nir); + return result; +} + +void +anv_device_finish_meta_resolve_state(struct anv_device *device) +{ + struct anv_meta_state *state = &device->meta_state; + VkDevice device_h = anv_device_to_handle(device); + VkRenderPass pass_h = device->meta_state.resolve.pass; + VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; + VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + if (pass_h) + ANV_CALL(DestroyRenderPass)(device_h, pass_h, + &device->meta_state.alloc); + + if (pipeline_layout_h) + ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); + + if (ds_layout_h) + ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); + + for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { + VkPipeline pipeline_h = state->resolve.pipelines[i]; + + if (pipeline_h) { + ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); + } + } +} + +VkResult +anv_device_init_meta_resolve_state(struct anv_device *device) +{ + VkResult res = VK_SUCCESS; + VkDevice device_h = anv_device_to_handle(device); + const VkAllocationCallbacks *alloc = &device->meta_state.alloc; + + const isl_sample_count_mask_t sample_count_mask = + isl_device_get_sample_counts(&device->isl_dev); + + zero(device->meta_state.resolve); + + struct anv_shader_module vs_module = { .nir = build_nir_vs() }; + if (!vs_module.nir) { + /* XXX: Need more accurate error */ + res = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); + + res = anv_CreateDescriptorSetLayout(device_h, + &(VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + }, + }, + }, + alloc, + &device->meta_state.resolve.ds_layout); + if (res != VK_SUCCESS) + goto fail; + + res = anv_CreatePipelineLayout(device_h, + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + alloc, + &device->meta_state.resolve.pipeline_layout); + if (res != VK_SUCCESS) + goto fail; + + res = create_pass(device); + if (res != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; + i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { + + uint32_t sample_count = 1 << (1 + i); + if (!(sample_count_mask & sample_count)) + continue; + + res = create_pipeline(device, sample_count, vs_module_h); + if (res != VK_SUCCESS) + goto fail; + } + + goto cleanup; + +fail: + anv_device_finish_meta_resolve_state(device); + +cleanup: + ralloc_free(vs_module.nir); + + return res; +} + +static void +emit_resolve(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src_iview, + const VkOffset2D *src_offset, + struct anv_image_view *dest_iview, + const VkOffset2D *dest_offset, + const VkExtent2D *resolve_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDevice device_h = anv_device_to_handle(device); + VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image *src_image = src_iview->image; + VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; + + const struct vertex_attrs vertex_data[3] = { + { + .vue_header = {0}, + .position = { + dest_offset->x + resolve_extent->width, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x + resolve_extent->width, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y + resolve_extent->height, + }, + .tex_position = { + src_offset->x, + src_offset->y + resolve_extent->height, + }, + }, + { + .vue_header = {0}, + .position = { + dest_offset->x, + dest_offset->y, + }, + .tex_position = { + src_offset->x, + src_offset->y, + }, + }, + }; + + struct anv_state vertex_mem = + anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, + sizeof(vertex_data), 16); + + struct anv_buffer vertex_buffer = { + .device = device, + .size = sizeof(vertex_data), + .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, + .offset = vertex_mem.offset, + }; + + VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); + + anv_CmdBindVertexBuffers(cmd_buffer_h, + /*firstBinding*/ 0, + /*bindingCount*/ 1, + (VkBuffer[]) { vertex_buffer_h }, + (VkDeviceSize[]) { 0 }); + + VkSampler sampler_h; + ANV_CALL(CreateSampler)(device_h, + &(VkSamplerCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .mipLodBias = 0.0, + .anisotropyEnable = false, + .compareEnable = false, + .minLod = 0.0, + .maxLod = 0.0, + .unnormalizedCoordinates = false, + }, + &cmd_buffer->pool->alloc, + &sampler_h); + + VkDescriptorSet desc_set_h; + anv_AllocateDescriptorSets(device_h, + &(VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = dummy_desc_pool_h, + .descriptorSetCount = 1, + .pSetLayouts = (VkDescriptorSetLayout[]) { + device->meta_state.resolve.ds_layout, + }, + }, + &desc_set_h); + + ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); + + anv_UpdateDescriptorSets(device_h, + /*writeCount*/ 1, + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = desc_set_h, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = sampler_h, + .imageView = anv_image_view_to_handle(src_iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + }, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdSetViewport)(cmd_buffer_h, + /*firstViewport*/ 0, + /*viewportCount*/ 1, + (VkViewport[]) { + { + .x = 0, + .y = 0, + .width = fb->width, + .height = fb->height, + .minDepth = 0.0, + .maxDepth = 1.0, + }, + }); + + ANV_CALL(CmdSetScissor)(cmd_buffer_h, + /*firstScissor*/ 0, + /*scissorCount*/ 1, + (VkRect2D[]) { + { + .offset = { 0, 0 }, + .extent = (VkExtent2D) { fb->width, fb->height }, + }, + }); + + VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); + ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); + + if (cmd_buffer->state.pipeline != pipeline) { + anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + } + + anv_CmdBindDescriptorSets(cmd_buffer_h, + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.resolve.pipeline_layout, + /*firstSet*/ 0, + /* setCount */ 1, + (VkDescriptorSet[]) { + desc_set_h, + }, + /*copyCount*/ 0, + /*copies */ NULL); + + ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); + + /* All objects below are consumed by the draw call. We may safely destroy + * them. + */ + anv_descriptor_set_destroy(device, desc_set); + anv_DestroySampler(device_h, sampler_h, + &cmd_buffer->pool->alloc); +} + +void anv_CmdResolveImage( + VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve* regions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); + ANV_FROM_HANDLE(anv_image, src_image, src_image_h); + ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); + struct anv_device *device = cmd_buffer->device; + struct anv_meta_saved_state state; + VkDevice device_h = anv_device_to_handle(device); + + meta_resolve_save(&state, cmd_buffer); + + assert(src_image->samples > 1); + assert(dest_image->samples == 1); + + if (src_image->samples >= 16) { + /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the + * glBlitFramebuffer workaround for samples >= 16. + */ + anv_finishme("vkCmdResolveImage: need interpolation workaround when " + "samples >= 16"); + } + + if (src_image->array_size > 1) + anv_finishme("vkCmdResolveImage: multisample array images"); + + for (uint32_t r = 0; r < region_count; ++r) { + const VkImageResolve *region = ®ions[r]; + + /* From the Vulkan 1.0 spec: + * + * - The aspectMask member of srcSubresource and dstSubresource must + * only contain VK_IMAGE_ASPECT_COLOR_BIT + * + * - The layerCount member of srcSubresource and dstSubresource must + * match + */ + assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(region->srcSubresource.layerCount == + region->dstSubresource.layerCount); + + const uint32_t src_base_layer = + anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, + ®ion->srcOffset); + + const uint32_t dest_base_layer = + anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, + ®ion->dstOffset); + + for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; + ++layer) { + + struct anv_image_view src_iview; + anv_image_view_init(&src_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image_h, + .viewType = anv_meta_get_view_type(src_image), + .format = src_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->srcSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = src_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + struct anv_image_view dest_iview; + anv_image_view_init(&dest_iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = dest_image_h, + .viewType = anv_meta_get_view_type(dest_image), + .format = dest_image->format->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = region->dstSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = dest_base_layer + layer, + .layerCount = 1, + }, + }, + cmd_buffer, 0); + + VkFramebuffer fb_h; + anv_CreateFramebuffer(device_h, + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkImageView[]) { + anv_image_view_to_handle(&dest_iview), + }, + .width = anv_minify(dest_image->extent.width, + region->dstSubresource.mipLevel), + .height = anv_minify(dest_image->extent.height, + region->dstSubresource.mipLevel), + .layers = 1 + }, + &cmd_buffer->pool->alloc, + &fb_h); + + ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = device->meta_state.resolve.pass, + .framebuffer = fb_h, + .renderArea = { + .offset = { + region->dstOffset.x, + region->dstOffset.y, + }, + .extent = { + region->extent.width, + region->extent.height, + } + }, + .clearValueCount = 0, + .pClearValues = NULL, + }, + VK_SUBPASS_CONTENTS_INLINE); + + emit_resolve(cmd_buffer, + &src_iview, + &(VkOffset2D) { + .x = region->srcOffset.x, + .y = region->srcOffset.y, + }, + &dest_iview, + &(VkOffset2D) { + .x = region->dstOffset.x, + .y = region->dstOffset.y, + }, + &(VkExtent2D) { + .width = region->extent.width, + .height = region->extent.height, + }); + + ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); + + anv_DestroyFramebuffer(device_h, fb_h, + &cmd_buffer->pool->alloc); + } + } + + meta_resolve_restore(&state, cmd_buffer); +} + +/** + * Emit any needed resolves for the current subpass. + */ +void +anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_meta_saved_state saved_state; + + /* FINISHME(perf): Skip clears for resolve attachments. + * + * From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a resolve + * attachment, then the loadOp is effectively ignored as the resolve is + * guaranteed to overwrite all pixels in the render area. + */ + + if (!subpass->has_resolve) + return; + + meta_resolve_save(&saved_state, cmd_buffer); + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i]; + uint32_t dest_att = subpass->resolve_attachments[i]; + + if (dest_att == VK_ATTACHMENT_UNUSED) + continue; + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dest_iview = fb->attachments[dest_att]; + + struct anv_subpass resolve_subpass = { + .color_count = 1, + .color_attachments = (uint32_t[]) { dest_att }, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); + + /* Subpass resolves must respect the render area. We can ignore the + * render area here because vkCmdBeginRenderPass set the render area + * with 3DSTATE_DRAWING_RECTANGLE. + * + * XXX(chadv): Does the hardware really respect + * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? + */ + emit_resolve(cmd_buffer, + src_iview, + &(VkOffset2D) { 0, 0 }, + dest_iview, + &(VkOffset2D) { 0, 0 }, + &(VkExtent2D) { fb->width, fb->height }); + } + + cmd_buffer->state.subpass = subpass; + meta_resolve_restore(&saved_state, cmd_buffer); +} diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h new file mode 100644 index 0000000..a7ea3eb --- /dev/null +++ b/src/intel/vulkan/anv_nir.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir/nir.h" +#include "anv_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); + +void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); +void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c new file mode 100644 index 0000000..e71a8ff --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_dynamic_offsets.c @@ -0,0 +1,171 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + const struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + unsigned block_idx_src; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + block_idx_src = 0; + break; + case nir_intrinsic_store_ssbo: + block_idx_src = 1; + break; + default: + continue; /* the loop */ + } + + nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; + assert(res_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); + assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + + unsigned set = res_intrin->const_index[0]; + unsigned binding = res_intrin->const_index[1]; + + set_layout = state->layout->set[set].layout; + if (set_layout->binding[binding].dynamic_offset_index < 0) + continue; + + b->cursor = nir_before_instr(&intrin->instr); + + /* First, we need to generate the uniform load for the buffer offset */ + uint32_t index = state->layout->set[set].dynamic_offset_start + + set_layout->binding[binding].dynamic_offset_index; + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 2; + offset_load->const_index[0] = state->indices_start + index * 8; + offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, + nir_imm_int(b, 8))); + + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); + nir_builder_instr_insert(b, &offset_load->instr); + + nir_src *offset_src = nir_get_io_offset_src(intrin); + nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, + &offset_load->dest.ssa); + + /* In order to avoid out-of-bounds access, we predicate */ + nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), + offset_src->ssa); + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(pred); + nir_cf_node_insert(b->cursor, &if_stmt->cf_node); + + nir_instr_remove(&intrin->instr); + *offset_src = nir_src_for_ssa(new_offset); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); + + if (intrin->intrinsic != nir_intrinsic_store_ssbo) { + /* It's a load, we need a phi node */ + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + intrin->num_components, NULL); + + nir_phi_src *src1 = ralloc(phi, nir_phi_src); + struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); + src1->pred = exec_node_data(nir_block, tnode, cf_node.node); + src1->src = nir_src_for_ssa(&intrin->dest.ssa); + exec_list_push_tail(&phi->srcs, &src1->node); + + b->cursor = nir_after_cf_list(&if_stmt->else_list); + nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, + (nir_const_value) { .u = { 0, 0, 0, 0 } }); + + nir_phi_src *src2 = ralloc(phi, nir_phi_src); + struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); + src2->pred = exec_node_data(nir_block, enode, cf_node.node); + src2->src = nir_src_for_ssa(zero); + exec_list_push_tail(&phi->srcs, &src2->node); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&phi->dest.ssa)); + + nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); + } + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) + return; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { + prog_data->param[i * 2 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].offset; + prog_data->param[i * 2 + 1 + shader->num_uniforms] = + (const union gl_constant_value *)&null_data->dynamic[i].range; + } + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; +} diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c new file mode 100644 index 0000000..c58a938 --- /dev/null +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -0,0 +1,394 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "program/prog_parameter.h" +#include "nir/nir_builder.h" + +struct apply_pipeline_layout_state { + nir_shader *shader; + nir_builder builder; + + struct { + BITSET_WORD *used; + uint8_t *surface_offsets; + uint8_t *sampler_offsets; + uint8_t *image_offsets; + } set[MAX_SETS]; +}; + +static void +add_binding(struct apply_pipeline_layout_state *state, + uint32_t set, uint32_t binding) +{ + BITSET_SET(state->set[set].used, binding); +} + +static void +add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) +{ + add_binding(state, var->data.descriptor_set, var->data.binding); +} + +static bool +get_used_bindings_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + add_binding(state, nir_intrinsic_desc_set(intrin), + nir_intrinsic_binding(intrin)); + break; + + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_samples: + add_var_binding(state, intrin->variables[0]->var); + break; + + default: + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + assert(tex->texture); + add_var_binding(state, tex->texture->var); + if (tex->sampler) + add_var_binding(state, tex->sampler->var); + break; + } + default: + continue; + } + } + + return true; +} + +static void +lower_res_index_intrinsic(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + + uint32_t surface_index = state->set[set].surface_offsets[binding]; + + nir_const_value *const_block_idx = + nir_src_as_const_value(intrin->src[0]); + + nir_ssa_def *block_index; + if (const_block_idx) { + block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); + } else { + block_index = nir_iadd(b, nir_imm_int(b, surface_index), + nir_ssa_for_src(b, intrin->src[0], 1)); + } + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); + nir_instr_remove(&intrin->instr); +} + +static void +lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, + unsigned *const_index, nir_tex_src_type src_type, + struct apply_pipeline_layout_state *state) +{ + if (deref->deref.child) { + assert(deref->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + *const_index += deref_array->base_offset; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, + tex->num_srcs + 1); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + new_srcs[i].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); + } + + ralloc_free(tex->src); + tex->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + tex->src[tex->num_srcs].src_type = src_type; + tex->num_srcs++; + assert(deref_array->indirect.is_ssa); + nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, + deref_array->indirect); + } + } +} + +static void +cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) +{ + if (deref->deref.child == NULL) + return; + + nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); + + if (deref_array->deref_array_type != nir_deref_array_type_indirect) + return; + + nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ + /* No one should have come by and lowered it already */ + assert(tex->texture); + + unsigned set = tex->texture->var->data.descriptor_set; + unsigned binding = tex->texture->var->data.binding; + tex->texture_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->texture, &tex->texture_index, + nir_tex_src_texture_offset, state); + + if (tex->sampler) { + unsigned set = tex->sampler->var->data.descriptor_set; + unsigned binding = tex->sampler->var->data.binding; + tex->sampler_index = state->set[set].surface_offsets[binding]; + lower_tex_deref(tex, tex->sampler, &tex->sampler_index, + nir_tex_src_sampler_offset, state); + } + + /* The backend only ever uses this to mark used surfaces. We don't care + * about that little optimization so it just needs to be non-zero. + */ + tex->texture_array_size = 1; + + cleanup_tex_deref(tex, tex->texture); + if (tex->sampler) + cleanup_tex_deref(tex, tex->sampler); + tex->texture = NULL; + tex->sampler = NULL; +} + +static bool +apply_pipeline_layout_block(nir_block *block, void *void_state) +{ + struct apply_pipeline_layout_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { + lower_res_index_intrinsic(intrin, state); + } + break; + } + case nir_instr_type_tex: + lower_tex(nir_instr_as_tex(instr), state); + break; + default: + continue; + } + } + + return true; +} + +static void +setup_vec4_uniform_value(const union gl_constant_value **params, + const union gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + +void +anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct anv_pipeline_layout *layout = pipeline->layout; + + struct apply_pipeline_layout_state state = { + .shader = shader, + }; + + void *mem_ctx = ralloc_context(NULL); + + for (unsigned s = 0; s < layout->num_sets; s++) { + const unsigned count = layout->set[s].layout->binding_count; + const unsigned words = BITSET_WORDS(count); + state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); + state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); + state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); + } + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, get_used_bindings_block, &state); + } + + struct anv_pipeline_bind_map map = { + .surface_count = 0, + .sampler_count = 0, + }; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) + map.surface_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) + map.sampler_count += set_layout->binding[b].array_size; + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) + map.image_count += set_layout->binding[b].array_size; + } + } + + map.surface_to_descriptor = + malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); + map.sampler_to_descriptor = + malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); + + pipeline->bindings[shader->stage] = map; + + unsigned surface = 0; + unsigned sampler = 0; + unsigned image = 0; + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + + BITSET_WORD b, _tmp; + BITSET_FOREACH_SET(b, _tmp, state.set[set].used, + set_layout->binding_count) { + unsigned array_size = set_layout->binding[b].array_size; + unsigned set_offset = set_layout->binding[b].descriptor_index; + + if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { + state.set[set].surface_offsets[b] = surface; + for (unsigned i = 0; i < array_size; i++) { + map.surface_to_descriptor[surface + i].set = set; + map.surface_to_descriptor[surface + i].offset = set_offset + i; + } + surface += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { + state.set[set].sampler_offsets[b] = sampler; + for (unsigned i = 0; i < array_size; i++) { + map.sampler_to_descriptor[sampler + i].set = set; + map.sampler_to_descriptor[sampler + i].offset = set_offset + i; + } + sampler += array_size; + } + + if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { + state.set[set].image_offsets[b] = image; + image += array_size; + } + } + } + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder_init(&state.builder, function->impl); + nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + if (map.image_count > 0) { + nir_foreach_variable(var, &shader->uniforms) { + if (glsl_type_is_image(var->type) || + (glsl_type_is_array(var->type) && + glsl_type_is_image(glsl_get_array_element(var->type)))) { + /* Images are represented as uniform push constants and the actual + * information required for reading/writing to/from the image is + * storred in the uniform. + */ + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned image_index = state.set[set].image_offsets[binding]; + + var->data.driver_location = shader->num_uniforms + + image_index * BRW_IMAGE_PARAM_SIZE * 4; + } + } + + struct anv_push_constants *null_data = NULL; + const gl_constant_value **param = prog_data->param + shader->num_uniforms; + const struct brw_image_param *image_param = null_data->images; + for (uint32_t i = 0; i < map.image_count; i++) { + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const union gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const union gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const union gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const union gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const union gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const union gl_constant_value *)image_param->swizzling, 2); + + param += BRW_IMAGE_PARAM_SIZE; + image_param ++; + } + + shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; + } +} diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c b/src/intel/vulkan/anv_nir_lower_push_constants.c new file mode 100644 index 0000000..53cd3d7 --- /dev/null +++ b/src/intel/vulkan/anv_nir_lower_push_constants.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" + +struct lower_push_constants_state { + nir_shader *shader; + bool is_scalar; +}; + +static bool +lower_push_constants_block(nir_block *block, void *void_state) +{ + struct lower_push_constants_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* TODO: Handle indirect push constants */ + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + /* This wont work for vec4 stages. */ + assert(state->is_scalar); + + assert(intrin->const_index[0] % 4 == 0); + assert(intrin->const_index[1] == 128); + + /* We just turn them into uniform loads with the appropreate offset */ + intrin->intrinsic = nir_intrinsic_load_uniform; + } + + return true; +} + +void +anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) +{ + struct lower_push_constants_state state = { + .shader = shader, + .is_scalar = is_scalar, + }; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_foreach_block(function->impl, lower_push_constants_block, &state); + } + + assert(shader->num_uniforms % 4 == 0); + if (is_scalar) + shader->num_uniforms /= 4; + else + shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); +} diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c new file mode 100644 index 0000000..d07e9fe --- /dev/null +++ b/src/intel/vulkan/anv_pass.c @@ -0,0 +1,160 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct anv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + uint32_t subpass_attachment_count = 0, *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + /* Count colorAttachmentCount again for resolve_attachments */ + desc->colorAttachmentCount; + } + + pass->subpass_attachments = + anv_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(uint32_t), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + anv_free2(&device->alloc, pAllocator, pass); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] + = desc->pInputAttachments[j].attachment; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] + = desc->pColorAttachments[j].attachment; + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = a; + if (a != VK_ATTACHMENT_UNUSED) + subpass->has_resolve = true; + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = + desc->pDepthStencilAttachment->attachment; + } else { + subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; + } + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +void anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); + anv_free2(&device->alloc, pAllocator, pass); +} + +void anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; +} diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c new file mode 100644 index 0000000..a7feefb --- /dev/null +++ b/src/intel/vulkan/anv_pipeline.c @@ -0,0 +1,1278 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "util/mesa-sha1.h" +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "nir/spirv/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_alloc2(&device->alloc, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + _mesa_sha1_compute(module->data, module->size, module->sha1); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_free2(&device->alloc, pAllocator, module); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader_module *module, + const char *entrypoint_name, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info) +{ + if (strcmp(entrypoint_name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + nir_function *entry_point; + if (module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = module->nir; + nir->options = nir_options; + nir_validate_shader(nir); + + assert(exec_list_length(&nir->functions) == 1); + struct exec_node *node = exec_list_get_head(&nir->functions); + entry_point = exec_node_data(nir_function, node, node); + } else { + uint32_t *spirv = (uint32_t *) module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(module->size % 4 == 0); + + uint32_t num_spec_entries = 0; + struct nir_spirv_specialization *spec_entries = NULL; + if (spec_info && spec_info->mapEntryCount > 0) { + num_spec_entries = spec_info->mapEntryCount; + spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + for (uint32_t i = 0; i < num_spec_entries; i++) { + const uint32_t *data = + spec_info->pData + spec_info->pMapEntries[i].offset; + assert((const void *)(data + 1) <= + spec_info->pData + spec_info->dataSize); + + spec_entries[i].id = spec_info->pMapEntries[i].constantID; + spec_entries[i].data = *data; + } + } + + entry_point = spirv_to_nir(spirv, module->size / 4, + spec_entries, num_spec_entries, + stage, entrypoint_name, nir_options); + nir = entry_point->shader; + assert(nir->stage == stage); + nir_validate_shader(nir); + + free(spec_entries); + + nir_lower_returns(nir); + nir_validate_shader(nir); + + nir_inline_functions(nir); + nir_validate_shader(nir); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + entry_point->name = ralloc_strdup(entry_point, "main"); + + nir_remove_dead_variables(nir, nir_var_shader_in); + nir_remove_dead_variables(nir, nir_var_shader_out); + nir_remove_dead_variables(nir, nir_var_system_value); + nir_validate_shader(nir); + + nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); + + nir_lower_system_values(nir); + nir_validate_shader(nir); + } + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entry_point->impl); + + uint32_t indirect_mask = 0; + if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) + indirect_mask |= (1 << nir_var_shader_in); + if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) + indirect_mask |= 1 << nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); + + return nir; +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_free2(&device->alloc, pAllocator, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + if (extra && extra->color_attachment_count >= 0) { + key->nr_color_regions = extra->color_attachment_count; + } else { + key->nr_color_regions = + render_pass->subpasses[info->subpass].color_count; + } + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pMultisampleState && + info->pMultisampleState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + module, entrypoint, stage, + spec_info); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (pipeline->bindings[stage].image_count > 0) + prog_data->nr_params += pipeline->bindings[stage].image_count * + BRW_IMAGE_PARAM_SIZE; + + if (prog_data->nr_params > 0) { + /* XXX: I think we're leaking this */ + prog_data->param = (const union gl_constant_value **) + malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const union gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + char surface_usage_mask[256], sampler_usage_mask[256]; + zero(surface_usage_mask); + zero(sampler_usage_mask); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + if (pipeline->layout) + anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias; + switch (stage) { + case MESA_SHADER_FRAGMENT: + bias = MAX_RTS; + break; + case MESA_SHADER_COMPUTE: + bias = 1; + break; + default: + bias = 0; + break; + } + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ + if (nir->stage != MESA_SHADER_VERTEX && + nir->stage != MESA_SHADER_TESS_CTRL && + nir->stage != MESA_SHADER_TESS_EVAL && + nir->stage != MESA_SHADER_FRAGMENT) { + nir = brw_nir_lower_io(nir, &pipeline->device->info, + compiler->scalar_stage[stage], false, NULL); + } + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params * 4; + + return nir; +} + +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + gl_shader_stage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, + [MESA_SHADER_TESS_CTRL] = 0, + [MESA_SHADER_TESS_EVAL] = 0, + [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, + [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, + [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= mesa_to_vk_shader_stage(stage); + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_vs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = kernel; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = kernel; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_gs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) + pipeline->writes_point_size = true; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + pipeline->gs_kernel = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_wm_prog_key(&pipeline->device->info, info, extra, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; + nir_foreach_variable_safe(var, &nir->outputs) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + unsigned rt = var->data.location - FRAG_RESULT_DATA0; + if (rt >= key.nr_color_regions) { + var->data.mode = nir_var_local; + exec_node_remove(&var->node); + exec_list_push_tail(&impl->locals, &var->node); + } + } + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + + ralloc_free(mem_ctx); + } + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = kernel; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + uint32_t kernel; + unsigned char sha1[20], *hash; + + populate_cs_prog_key(&pipeline->device->info, &key); + + if (module->size > 0) { + hash = sha1; + anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); + kernel = anv_pipeline_cache_search(cache, hash, prog_data); + } else { + hash = NULL; + } + + if (module->size == 0 || kernel == NO_KERNEL) { + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.work_groups_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + prog_data->base.total_shared = nir->num_shared; + + void *mem_ctx = ralloc_context(NULL); + + if (module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + kernel = anv_pipeline_cache_upload_kernel(cache, hash, + shader_code, code_size, + prog_data, sizeof(*prog_data)); + ralloc_free(mem_ctx); + } + + pipeline->cs_simd = kernel; + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterizationState); + dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterizationState); + dynamic->depth_bias.bias = + pCreateInfo->pRasterizationState->depthBiasConstantFactor; + dynamic->depth_bias.clamp = + pCreateInfo->pRasterizationState->depthBiasClamp; + dynamic->depth_bias.slope = + pCreateInfo->pRasterizationState->depthBiasSlopeFactor; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConstants, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.compareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.compareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.writeMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.writeMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.reference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.reference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterizationState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: + case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, + struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + if (alloc == NULL) + alloc = &device->alloc; + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); + if (result != VK_SUCCESS) + return result; + + pipeline->batch.alloc = alloc; + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + pipeline->ps_ksp0 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader_module, module, + pCreateInfo->pStages[i].module); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX_BIT: + anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_GEOMETRY_BIT: + anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + case VK_SHADER_STAGE_FRAGMENT_BIT: + anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, + pCreateInfo->pStages[i].pName, + pCreateInfo->pStages[i].pSpecializationInfo); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + + uint64_t inputs_read; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + inputs_read = ~0ull; + } else { + inputs_read = pipeline->vs_prog_data.inputs_read; + } + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + + if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + pipeline->vb_used |= 1 << desc->binding; + } + + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->binding_stride[desc->binding] = desc->stride; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->inputRate) { + default: + case VK_VERTEX_INPUT_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + while (anv_block_pool_size(&device->scratch_block_pool) < + pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + else + return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + case 9: + return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, + pipelineCache, + &pCreateInfos[i], + NULL, pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + VkPipelineCache _cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + if (cache == NULL) + cache = &device->default_pipeline_cache; + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + else + return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + case 9: + return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, pipelineCache, + &pCreateInfos[i], + pAllocator, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j], pAllocator); + } + + return result; + } + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c new file mode 100644 index 0000000..c89bb2a --- /dev/null +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -0,0 +1,405 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/mesa-sha1.h" +#include "util/debug.h" +#include "anv_private.h" + +/* Remaining work: + * + * - Compact binding table layout so it's tight and not dependent on + * descriptor set layout. + * + * - Review prog_data struct for size and cacheability: struct + * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 + * bit quantities etc; param, pull_param, and image_params are pointers, we + * just need the compation map. use bit fields for all bools, eg + * dual_src_blend. + */ + +void +anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device) +{ + cache->device = device; + anv_state_stream_init(&cache->program_stream, + &device->instruction_block_pool); + pthread_mutex_init(&cache->mutex, NULL); + + cache->kernel_count = 0; + cache->total_size = 0; + cache->table_size = 1024; + const size_t byte_size = cache->table_size * sizeof(cache->table[0]); + cache->table = malloc(byte_size); + + /* We don't consider allocation failure fatal, we just start with a 0-sized + * cache. */ + if (cache->table == NULL) + cache->table_size = 0; + else + memset(cache->table, 0xff, byte_size); +} + +void +anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) +{ + anv_state_stream_finish(&cache->program_stream); + pthread_mutex_destroy(&cache->mutex); + free(cache->table); +} + +struct cache_entry { + unsigned char sha1[20]; + uint32_t prog_data_size; + uint32_t kernel_size; + char prog_data[0]; + + /* kernel follows prog_data at next 64 byte aligned address */ +}; + +void +anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info) +{ + struct mesa_sha1 *ctx; + + ctx = _mesa_sha1_init(); + _mesa_sha1_update(ctx, &key, sizeof(key)); + _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); + /* hash in shader stage, pipeline layout? */ + if (spec_info) { + _mesa_sha1_update(ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); + _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_final(ctx, hash); +} + +uint32_t +anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) sha1); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + const uint32_t offset = cache->table[index]; + + if (offset == ~0) + return NO_KERNEL; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { + if (prog_data) + memcpy(prog_data, entry->prog_data, entry->prog_data_size); + + const uint32_t preamble_size = + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + return offset + preamble_size; + } + } + + return NO_KERNEL; +} + +static void +anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, + struct cache_entry *entry, uint32_t entry_offset) +{ + const uint32_t mask = cache->table_size - 1; + const uint32_t start = (*(uint32_t *) entry->sha1); + + /* We'll always be able to insert when we get here. */ + assert(cache->kernel_count < cache->table_size / 2); + + for (uint32_t i = 0; i < cache->table_size; i++) { + const uint32_t index = (start + i) & mask; + if (cache->table[index] == ~0) { + cache->table[index] = entry_offset; + break; + } + } + + /* We don't include the alignment padding bytes when we serialize, so + * don't include taht in the the total size. */ + cache->total_size += + sizeof(*entry) + entry->prog_data_size + entry->kernel_size; + cache->kernel_count++; +} + +static VkResult +anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) +{ + const uint32_t table_size = cache->table_size * 2; + const uint32_t old_table_size = cache->table_size; + const size_t byte_size = table_size * sizeof(cache->table[0]); + uint32_t *table; + uint32_t *old_table = cache->table; + + table = malloc(byte_size); + if (table == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + cache->table = table; + cache->table_size = table_size; + cache->kernel_count = 0; + cache->total_size = 0; + + memset(cache->table, 0xff, byte_size); + for (uint32_t i = 0; i < old_table_size; i++) { + const uint32_t offset = old_table[i]; + if (offset == ~0) + continue; + + struct cache_entry *entry = + cache->program_stream.block_pool->map + offset; + anv_pipeline_cache_add_entry(cache, entry, offset); + } + + free(old_table); + + return VK_SUCCESS; +} + +uint32_t +anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, size_t kernel_size, + const void *prog_data, size_t prog_data_size) +{ + pthread_mutex_lock(&cache->mutex); + struct cache_entry *entry; + + /* Meta pipelines don't have SPIR-V, so we can't hash them. + * Consequentally, they just don't get cached. + */ + const uint32_t preamble_size = sha1 ? + align_u32(sizeof(*entry) + prog_data_size, 64) : + 0; + + const uint32_t size = preamble_size + kernel_size; + + assert(size < cache->program_stream.block_pool->block_size); + const struct anv_state state = + anv_state_stream_alloc(&cache->program_stream, size, 64); + + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { + assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); + entry = state.map; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, prog_data, prog_data_size); + entry->kernel_size = kernel_size; + + if (cache->kernel_count == cache->table_size / 2) + anv_pipeline_cache_grow(cache); + + /* Failing to grow that hash table isn't fatal, but may mean we don't + * have enough space to add this new kernel. Only add it if there's room. + */ + if (cache->kernel_count < cache->table_size / 2) + anv_pipeline_cache_add_entry(cache, entry, state.offset); + } + + pthread_mutex_unlock(&cache->mutex); + + memcpy(state.map + preamble_size, kernel, kernel_size); + + if (!cache->device->info.has_llc) + anv_state_clflush(state); + + return state.offset + preamble_size; +} + +static void +anv_pipeline_cache_load(struct anv_pipeline_cache *cache, + const void *data, size_t size) +{ + struct anv_device *device = cache->device; + uint8_t uuid[VK_UUID_SIZE]; + struct { + uint32_t device_id; + uint8_t uuid[VK_UUID_SIZE]; + } header; + + if (size < sizeof(header)) + return; + memcpy(&header, data, sizeof(header)); + if (header.device_id != device->chipset_id) + return; + anv_device_get_cache_uuid(uuid); + if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) + return; + + const void *end = data + size; + const void *p = data + sizeof(header); + + while (p < end) { + /* The kernels aren't 64 byte aligned in the serialized format so + * they're always right after the prog_data. + */ + const struct cache_entry *entry = p; + const void *kernel = &entry->prog_data[entry->prog_data_size]; + + anv_pipeline_cache_upload_kernel(cache, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + p = kernel + entry->kernel_size; + } +} + +VkResult anv_CreatePipelineCache( + VkDevice _device, + const VkPipelineCacheCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineCache* pPipelineCache) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_cache *cache; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + cache = anv_alloc2(&device->alloc, pAllocator, + sizeof(*cache), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cache == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_pipeline_cache_init(cache, device); + + if (pCreateInfo->initialDataSize > 0) + anv_pipeline_cache_load(cache, + pCreateInfo->pInitialData, + pCreateInfo->initialDataSize); + + *pPipelineCache = anv_pipeline_cache_to_handle(cache); + + return VK_SUCCESS; +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + anv_pipeline_cache_finish(cache); + + anv_free2(&device->alloc, pAllocator, cache); +} + +VkResult anv_GetPipelineCacheData( + VkDevice _device, + VkPipelineCache _cache, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + + const size_t size = 4 + VK_UUID_SIZE + cache->total_size; + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) { + *pDataSize = 0; + return VK_INCOMPLETE; + } + + void *p = pData; + memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); + p += sizeof(device->chipset_id); + + anv_device_get_cache_uuid(p); + p += VK_UUID_SIZE; + + struct cache_entry *entry; + for (uint32_t i = 0; i < cache->table_size; i++) { + if (cache->table[i] == ~0) + continue; + + entry = cache->program_stream.block_pool->map + cache->table[i]; + + memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); + p += sizeof(*entry) + entry->prog_data_size; + + void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + + memcpy(p, kernel, entry->kernel_size); + p += entry->kernel_size; + } + + return VK_SUCCESS; +} + +static void +anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, + struct anv_pipeline_cache *src) +{ + for (uint32_t i = 0; i < src->table_size; i++) { + if (src->table[i] == ~0) + continue; + + struct cache_entry *entry = + src->program_stream.block_pool->map + src->table[i]; + + if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) + continue; + + const void *kernel = (void *) entry + + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + anv_pipeline_cache_upload_kernel(dst, entry->sha1, + kernel, entry->kernel_size, + entry->prog_data, entry->prog_data_size); + } +} + +VkResult anv_MergePipelineCaches( + VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); + + for (uint32_t i = 0; i < srcCacheCount; i++) { + ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); + + anv_pipeline_cache_merge(dst, src); + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h new file mode 100644 index 0000000..ba86333 --- /dev/null +++ b/src/intel/vulkan/anv_private.h @@ -0,0 +1,1876 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" +#include "util/list.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#define VK_USE_PLATFORM_XCB_KHR +#define VK_USE_PLATFORM_WAYLAND_KHR + +#define VK_PROTOTYPES +#include +#include +#include + +#include "anv_entrypoints.h" +#include "anv_gen_macros.h" +#include "brw_context.h" +#include "isl/isl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_DYNAMIC_BUFFERS 16 +#define MAX_IMAGES 8 +#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline uint64_t +align_u64(uint64_t v, uint64_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline float +anv_clamp_f(float f, float min, float max) +{ + assert(min < max); + + if (f > max) + return max; + else if (f < min) + return min; + else + return f; +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +#define typed_memcpy(dest, src, count) ({ \ + static_assert(sizeof(*src) == sizeof(*dest), ""); \ + memcpy((dest), (src), (count) * sizeof(*(src))); \ +}) + +#define zero(x) (memset(&(x), 0, sizeof(x))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); + +#ifdef DEBUG +#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); +#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); +#else +#define vk_error(error) error +#define vk_errorf(error, format, ...) error +#endif + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +/** + * If a block of code is annotated with anv_validate, then the block runs only + * in debug builds. + */ +#ifdef DEBUG +#define anv_validate if (1) +#else +#define anv_validate if (0) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void * +anv_vector_head(struct anv_vector *vector) +{ + assert(vector->tail < vector->head); + return (void *)((char *)vector->data + + ((vector->head - vector->element_size) & + (vector->size - 1))); +} + +static inline void * +anv_vector_tail(struct anv_vector *vector) +{ + return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + uint32_t gem_handle; + + /* Index into the current validation list. This is used by the + * validation list building alrogithm to track which buffers are already + * in the validation list so that we can ensure uniqueness. + */ + uint32_t index; + + /* Last known offset. This value is provided by the kernel when we + * execbuf and is used as the presumed offset for the next bunch of + * relocations. + */ + uint64_t offset; + + uint64_t size; + void *map; + + /* We need to set the WRITE flag on winsys bos so GEM will know we're + * writing to them and synchronize uses on other rings (eg if the display + * server uses the blitter ring). + */ + bool is_winsys_bo; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + int32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + + /* The offset from the start of the bo to the "center" of the block + * pool. Pointers to allocated blocks are given by + * bo.map + center_bo_offset + offsets. + */ + uint32_t center_bo_offset; + + /* Current memory map of the block pool. This pointer may or may not + * point to the actual beginning of the block pool memory. If + * anv_block_pool_alloc_back has ever been called, then this pointer + * will point to the "center" position of the buffer and all offsets + * (negative or positive) given out by the block pool alloc functions + * will be valid relative to this pointer. + * + * In particular, map == bo.map + center_offset + */ + void *map; + int fd; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + union anv_free_list free_list; + struct anv_block_state state; + + union anv_free_list back_free_list; + struct anv_block_state back_state; +}; + +/* Block pools are backed by a fixed-size 2GB memfd */ +#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) + +/* The center of the block pool is also the middle of the memfd. This may + * change in the future if we decide differently for some reason. + */ +#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) + +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->state.end + pool->back_state.end; +} + +struct anv_state { + int32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream_block; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + + /* The current working block */ + struct anv_state_stream_block *block; + + /* Offset at which the current block starts */ + uint32_t start; + /* Offset at which to allocate the next state */ + uint32_t next; + /* Offset at which the current block ends */ + uint32_t end; +}; + +#define CACHELINE_SIZE 64 +#define CACHELINE_MASK 63 + +static inline void +anv_clflush_range(void *start, size_t size) +{ + void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); + void *end = start + size; + + __builtin_ia32_mfence(); + while (p < end) { + __builtin_ia32_clflush(p); + p += CACHELINE_SIZE; + } +} + +static void inline +anv_state_clflush(struct anv_state state) +{ + anv_clflush_range(state.map, state.alloc_size); +} + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc(struct anv_block_pool *pool); +int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + + +void *anv_resolve_entrypoint(uint32_t index); + +extern struct anv_dispatch_table dtable; + +#define ANV_CALL(func) ({ \ + if (dtable.func == NULL) { \ + size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ + dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ + } \ + dtable.func; \ +}) + +static inline void * +anv_alloc(const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnAllocation(alloc->pUserData, size, align, scope); +} + +static inline void * +anv_realloc(const VkAllocationCallbacks *alloc, + void *ptr, size_t size, size_t align, + VkSystemAllocationScope scope) +{ + return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); +} + +static inline void +anv_free(const VkAllocationCallbacks *alloc, void *data) +{ + alloc->pfnFree(alloc->pUserData, data); +} + +static inline void * +anv_alloc2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + size_t size, size_t align, + VkSystemAllocationScope scope) +{ + if (alloc) + return anv_alloc(alloc, size, align, scope); + else + return anv_alloc(parent_alloc, size, align, scope); +} + +static inline void +anv_free2(const VkAllocationCallbacks *parent_alloc, + const VkAllocationCallbacks *alloc, + void *data) +{ + if (alloc) + anv_free(alloc, data); + else + anv_free(parent_alloc, data); +} + +struct anv_physical_device { + VK_LOADER_DATA _loader_data; + + struct anv_instance * instance; + uint32_t chipset_id; + const char * path; + const char * name; + const struct brw_device_info * info; + uint64_t aperture_size; + struct brw_compiler * compiler; + struct isl_device isl_dev; +}; + +struct anv_wsi_interaface; + +#define VK_ICD_WSI_PLATFORM_MAX 5 + +struct anv_instance { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + uint32_t apiVersion; + int physicalDeviceCount; + struct anv_physical_device physicalDevice; + + struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; +}; + +VkResult anv_init_wsi(struct anv_instance *instance); +void anv_finish_wsi(struct anv_instance *instance); + +struct anv_meta_state { + VkAllocationCallbacks alloc; + + /** + * Use array element `i` for images with `2^i` samples. + */ + struct { + /** + * Pipeline N is used to clear color attachment N of the current + * subpass. + * + * HACK: We use one pipeline per color attachment to work around the + * compiler's inability to dynamically set the render target index of + * the render target write message. + */ + struct anv_pipeline *color_pipelines[MAX_RTS]; + + struct anv_pipeline *depth_only_pipeline; + struct anv_pipeline *stencil_only_pipeline; + struct anv_pipeline *depthstencil_pipeline; + } clear[1 + MAX_SAMPLES_LOG2]; + + struct { + VkRenderPass render_pass; + + /** Pipeline that blits from a 1D image. */ + VkPipeline pipeline_1d_src; + + /** Pipeline that blits from a 2D image. */ + VkPipeline pipeline_2d_src; + + /** Pipeline that blits from a 3D image. */ + VkPipeline pipeline_3d_src; + + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + /** Pipeline [i] resolves an image with 2^(i+1) samples. */ + VkPipeline pipelines[MAX_SAMPLES_LOG2]; + + VkRenderPass pass; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } resolve; +}; + +struct anv_queue { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_state_pool * pool; +}; + +struct anv_pipeline_cache { + struct anv_device * device; + struct anv_state_stream program_stream; + pthread_mutex_t mutex; + + uint32_t total_size; + uint32_t table_size; + uint32_t kernel_count; + uint32_t *table; +}; + +void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, + struct anv_device *device); +void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); +uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, + const unsigned char *sha1, void *prog_data); +uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, + const unsigned char *sha1, + const void *kernel, + size_t kernel_size, + const void *prog_data, + size_t prog_data_size); + +struct anv_device { + VK_LOADER_DATA _loader_data; + + VkAllocationCallbacks alloc; + + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + struct isl_device isl_dev; + int context_id; + int fd; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_pipeline_cache default_pipeline_cache; + + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_bo workaround_bo; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + pthread_mutex_t mutex; +}; + +VkResult gen7_init_device_state(struct anv_device *device); +VkResult gen75_init_device_state(struct anv_device *device); +VkResult gen8_init_device_state(struct anv_device *device); +VkResult gen9_init_device_state(struct anv_device *device); + +void anv_device_get_cache_uuid(void *uuid); + + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); +int anv_gem_get_aperture(int fd, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); +uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); +int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, + uint32_t read_domains, uint32_t write_domain); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); +void anv_reloc_list_finish(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + const VkAllocationCallbacks *alloc, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + /* Last seen surface state block pool bo offset */ + uint32_t last_ss_pool_bo_offset; + + struct anv_reloc_list relocs; +}; + +struct anv_batch { + const VkAllocationCallbacks * alloc; + + void * start; + void * end; + void * next; + + struct anv_reloc_list * relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); +VkResult anv_device_submit_simple_batch(struct anv_device *device, + struct anv_batch *batch); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return address.offset + delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +/* Wrapper macros needed to work around preprocessor argument issues. In + * particular, arguments don't get pre-evaluated if they are concatenated. + * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the + * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". + * We can work around this easily enough with these helpers. + */ +#define __anv_cmd_length(cmd) cmd ## _length +#define __anv_cmd_length_bias(cmd) cmd ## _length_bias +#define __anv_cmd_header(cmd) cmd ## _header +#define __anv_cmd_pack(cmd) cmd ## _pack + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + struct cmd __template = { \ + __anv_cmd_header(cmd), \ + .DWordLength = n - __anv_cmd_length_bias(cmd), \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ + const uint32_t __size = __anv_cmd_length(cmd) * 4; \ + struct anv_state __state = \ + anv_state_pool_alloc((pool), __size, align); \ + struct cmd __template = { \ + __VA_ARGS__ \ + }; \ + __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ + if (!(pool)->block_pool->device->info.has_llc) \ + anv_state_clflush(__state); \ + __state; \ + }) + +#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ + .GraphicsDataTypeGFDT = 0, \ + .LLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ + .LLCeLLCCacheabilityControlLLCCC = 0, \ + .L3CacheabilityControlL3CC = 1, \ +} + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +/* Skylake: MOCS is now an index into an array of 62 different caching + * configurations programmed by the kernel. + */ + +#define GEN9_MOCS { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 2 \ + } + +#define GEN9_MOCS_PTE { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ + } + +struct anv_device_memory { + struct anv_bo bo; + uint32_t type_index; + VkDeviceSize map_size; + void * map; +}; + +/** + * Header for Vertex URB Entry (VUE) + */ +struct anv_vue_header { + uint32_t Reserved; + uint32_t RTAIndex; /* RenderTargetArrayIndex */ + uint32_t ViewportIndex; + float PointWidth; +}; + +struct anv_descriptor_set_binding_layout { + /* Number of array elements in this binding */ + uint16_t array_size; + + /* Index into the flattend descriptor set */ + uint16_t descriptor_index; + + /* Index into the dynamic state array for a dynamic buffer */ + int16_t dynamic_offset_index; + + /* Index into the descriptor set buffer views */ + int16_t buffer_index; + + struct { + /* Index into the binding table for the associated surface */ + int16_t surface_index; + + /* Index into the sampler table for the associated sampler */ + int16_t sampler_index; + + /* Index into the image table for the associated image */ + int16_t image_index; + } stage[MESA_SHADER_STAGES]; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct anv_sampler **immutable_samplers; +}; + +struct anv_descriptor_set_layout { + /* Number of bindings in this descriptor set */ + uint16_t binding_count; + + /* Total size of the descriptor set with room for all array entries */ + uint16_t size; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + /* Number of buffers in this descriptor set */ + uint16_t buffer_count; + + /* Number of dynamic offsets used by this descriptor set */ + uint16_t dynamic_offset_count; + + /* Bindings in this descriptor set */ + struct anv_descriptor_set_binding_layout binding[0]; +}; + +struct anv_descriptor { + VkDescriptorType type; + + union { + struct { + struct anv_image_view *image_view; + struct anv_sampler *sampler; + }; + + struct anv_buffer_view *buffer_view; + }; +}; + +struct anv_descriptor_set { + const struct anv_descriptor_set_layout *layout; + uint32_t buffer_count; + struct anv_buffer_view *buffer_views; + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +struct anv_pipeline_binding { + /* The descriptor set this surface corresponds to */ + uint16_t set; + + /* Offset into the descriptor set */ + uint16_t offset; +}; + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t dynamic_offset_start; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + bool has_dynamic_offsets; + } stage[MESA_SHADER_STAGES]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + VkBufferUsageFlags usage; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +enum anv_cmd_dirty_bits { + ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ + ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ + ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ + ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, + ANV_CMD_DIRTY_PIPELINE = 1 << 9, + ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, + ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, +}; +typedef uint32_t anv_cmd_dirty_mask_t; + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_push_constants { + /* Current allocated size of this push constants data structure. + * Because a decent chunk of it may not be used (images on SKL, for + * instance), we won't actually allocate the entire structure up-front. + */ + uint32_t size; + + /* Push constant data provided by the client through vkPushConstants */ + uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; + + /* Our hardware only provides zero-based vertex and instance id so, in + * order to satisfy the vulkan requirements, we may have to push one or + * both of these into the shader. + */ + uint32_t base_vertex; + uint32_t base_instance; + + /* Offsets and ranges for dynamically bound buffers */ + struct { + uint32_t offset; + uint32_t range; + } dynamic[MAX_DYNAMIC_BUFFERS]; + + /* Image data for image_load_store on pre-SKL */ + struct brw_image_param images[MAX_IMAGES]; +}; + +struct anv_dynamic_state { + struct { + uint32_t count; + VkViewport viewports[MAX_VIEWPORTS]; + } viewport; + + struct { + uint32_t count; + VkRect2D scissors[MAX_SCISSORS]; + } scissor; + + float line_width; + + struct { + float bias; + float clamp; + float slope; + } depth_bias; + + float blend_constants[4]; + + struct { + float min; + float max; + } depth_bounds; + + struct { + uint32_t front; + uint32_t back; + } stencil_compare_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_write_mask; + + struct { + uint32_t front; + uint32_t back; + } stencil_reference; +}; + +extern const struct anv_dynamic_state default_dynamic_state; + +void anv_dynamic_state_copy(struct anv_dynamic_state *dest, + const struct anv_dynamic_state *src, + uint32_t copy_mask); + +/** + * Attachment state when recording a renderpass instance. + * + * The clear value is valid only if there exists a pending clear. + */ +struct anv_attachment_state { + VkImageAspectFlags pending_clear_aspects; + VkClearValue clear_value; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + /* PIPELINE_SELECT.PipelineSelection */ + uint32_t current_pipeline; + uint32_t current_l3_config; + uint32_t vb_dirty; + anv_cmd_dirty_mask_t dirty; + anv_cmd_dirty_mask_t compute_dirty; + uint32_t num_workgroups_offset; + struct anv_bo *num_workgroups_bo; + VkShaderStageFlags descriptors_dirty; + VkShaderStageFlags push_constants_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + uint32_t restart_index; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set * descriptors[MAX_SETS]; + struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_state binding_tables[MESA_SHADER_STAGES]; + struct anv_state samplers[MESA_SHADER_STAGES]; + struct anv_dynamic_state dynamic; + bool need_query_wa; + + /** + * Array length is anv_cmd_state::pass::attachment_count. Array content is + * valid only when recording a render pass instance. + */ + struct anv_attachment_state * attachments; + + struct { + struct anv_buffer * index_buffer; + uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ + uint32_t index_offset; + } gen7; +}; + +struct anv_cmd_pool { + VkAllocationCallbacks alloc; + struct list_head cmd_buffers; +}; + +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + +struct anv_cmd_buffer { + VK_LOADER_DATA _loader_data; + + struct anv_device * device; + + struct anv_cmd_pool * pool; + struct list_head pool_link; + + struct anv_batch batch; + + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ + struct list_head batch_bos; + enum anv_cmd_buffer_exec_mode exec_mode; + + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + + /* A vector of int32_t's for every block of binding tables. + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector bt_blocks; + uint32_t bt_next; + struct anv_reloc_list surface_relocs; + + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). + */ + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ + uint32_t serial; + + /* Stream objects for storing temporary data */ + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + + struct anv_cmd_state state; +}; + +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); + +VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state); +VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state); +uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages); + +struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + const void *data, uint32_t size, uint32_t alignment); +struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment); + +struct anv_address +anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, + uint32_t entries, uint32_t *state_offset); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult +anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); + +void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); +void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, + const VkRenderPassBeginInfo *info); + +void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); +void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); +void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); + +void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); +void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); + +struct anv_state +anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage); +struct anv_state +anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); + +const struct anv_image_view * +anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_event { + uint64_t semaphore; + struct anv_state state; +}; + +struct nir_shader; + +struct anv_shader_module { + struct nir_shader * nir; + + unsigned char sha1[20]; + uint32_t size; + char data[0]; +}; + +void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +static inline gl_shader_stage +vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) +{ + assert(__builtin_popcount(vk_stage) == 1); + return ffs(vk_stage) - 1; +} + +static inline VkShaderStageFlagBits +mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) +{ + return (1 << mesa_stage); +} + +#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) + +#define anv_foreach_stage(stage, stage_bits) \ + for (gl_shader_stage stage, \ + __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ + stage = __builtin_ffs(__tmp) - 1, __tmp; \ + __tmp &= ~(1 << (stage))) + +struct anv_pipeline_bind_map { + uint32_t surface_count; + uint32_t sampler_count; + uint32_t image_count; + + struct anv_pipeline_binding * surface_to_descriptor; + struct anv_pipeline_binding * sampler_to_descriptor; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[512]; + struct anv_reloc_list batch_relocs; + uint32_t dynamic_state_mask; + struct anv_dynamic_state dynamic_state; + + struct anv_pipeline_layout * layout; + struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; + + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + uint32_t scratch_start[MESA_SHADER_STAGES]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + VkShaderStageFlags active_stages; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t vs_vec4; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t ps_ksp0; + uint32_t ps_ksp2; + uint32_t ps_grf_start0; + uint32_t ps_grf_start2; + uint32_t gs_kernel; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + bool instancing_enable[MAX_VBS]; + bool primitive_restart; + uint32_t topology; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; + + struct { + uint32_t sf[7]; + uint32_t depth_stencil_state[3]; + } gen7; + + struct { + uint32_t sf[4]; + uint32_t raster[5]; + uint32_t wm_depth_stencil[3]; + } gen8; + + struct { + uint32_t wm_depth_stencil[4]; + } gen9; +}; + +struct anv_graphics_pipeline_create_info { + /** + * If non-negative, overrides the color attachment count of the pipeline's + * subpass. + */ + int8_t color_attachment_count; + + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc); + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *info, + struct anv_shader_module *module, + const char *entrypoint, + const VkSpecializationInfo *spec_info); + +VkResult +anv_graphics_pipeline_create(VkDevice device, + VkPipelineCache cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen7_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen75_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_graphics_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen7_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen75_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +VkResult +gen8_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); +VkResult +gen9_compute_pipeline_create(VkDevice _device, + struct anv_pipeline_cache *cache, + const VkComputePipelineCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkPipeline *pPipeline); + +struct anv_format_swizzle { + unsigned r:2; + unsigned g:2; + unsigned b:2; + unsigned a:2; +}; + +struct anv_format { + const VkFormat vk_format; + const char *name; + enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + const struct isl_format_layout *isl_layout; + struct anv_format_swizzle swizzle; + bool has_depth; + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); + +enum isl_format +anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, + VkImageTiling tiling, struct anv_format_swizzle *swizzle); + +static inline bool +anv_format_is_color(const struct anv_format *format) +{ + return !format->has_depth && !format->has_stencil; +} + +static inline bool +anv_format_is_depth_or_stencil(const struct anv_format *format) +{ + return format->has_depth || format->has_stencil; +} + +/** + * Subsurface of an anv_image. + */ +struct anv_surface { + struct isl_surf isl; + + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; +}; + +struct anv_image { + VkImageType type; + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + const struct anv_format *format; + VkExtent3D extent; + uint32_t levels; + uint32_t array_size; + uint32_t samples; /**< VkImageCreateInfo::samples */ + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + /** + * Image subsurfaces + * + * For each foo, anv_image::foo_surface is valid if and only if + * anv_image::format has a foo aspect. + * + * The hardware requires that the depth buffer and stencil buffer be + * separate surfaces. From Vulkan's perspective, though, depth and stencil + * reside in the same VkImage. To satisfy both the hardware and Vulkan, we + * allocate the depth and stencil buffers as separate surfaces in the same + * bo. + */ + union { + struct anv_surface color_surface; + + struct { + struct anv_surface depth_surface; + struct anv_surface stencil_surface; + }; + }; +}; + +struct anv_image_view { + const struct anv_image *image; /**< VkImageViewCreateInfo::image */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + + VkImageAspectFlags aspect_mask; + VkFormat vk_format; + VkComponentMapping swizzle; + enum isl_format format; + uint32_t base_layer; + uint32_t base_mip; + VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ + VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ + + /** RENDER_SURFACE_STATE when using image as a color render target. */ + struct anv_state color_rt_surface_state; + + /** RENDER_SURFACE_STATE when using image as a sampler surface. */ + struct anv_state sampler_surface_state; + + /** RENDER_SURFACE_STATE when using image as a storage image. */ + struct anv_state storage_surface_state; +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + isl_tiling_flags_t isl_tiling_flags; + uint32_t stride; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + const VkAllocationCallbacks* alloc, + VkImage *pImage); + +struct anv_surface * +anv_image_get_surface_for_aspect_mask(struct anv_image *image, + VkImageAspectFlags aspect_mask); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer, + uint32_t offset); + +void +anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen7_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen75_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen8_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); +void +gen9_fill_image_surface_state(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage); + +struct anv_buffer_view { + enum isl_format format; /**< VkBufferViewCreateInfo::format */ + struct anv_bo *bo; + uint32_t offset; /**< Offset into bo. */ + uint64_t range; /**< VkBufferViewCreateInfo::range */ + + struct anv_state surface_state; + struct anv_state storage_surface_state; +}; + +const struct anv_format * +anv_format_for_descriptor_type(VkDescriptorType type); + +void anv_fill_buffer_surface_state(struct anv_device *device, + struct anv_state state, + enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void gen7_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen75_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen8_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); +void gen9_fill_buffer_surface_state(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride); + +void anv_image_view_fill_image_param(struct anv_device *device, + struct anv_image_view *view, + struct brw_image_param *param); +void anv_buffer_view_fill_image_param(struct anv_device *device, + struct anv_buffer_view *view, + struct brw_image_param *param); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct anv_image_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; + + /** Subpass has at least one resolve attachment */ + bool has_resolve; +}; + +struct anv_render_pass_attachment { + const struct anv_format *format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + uint32_t * subpass_attachments; + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +extern struct anv_render_pass anv_meta_dummy_renderpass; + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +void anv_dump_image_to_ppm(struct anv_device *device, + struct anv_image *image, unsigned miplevel, + unsigned array_layer, const char *filename); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *)(uintptr_t) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType)(uintptr_t) _obj; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c new file mode 100644 index 0000000..e45b519 --- /dev/null +++ b/src/intel/vulkan/anv_query.c @@ -0,0 +1,187 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + uint32_t slot_size; + uint64_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_ERROR_INCOMPATIBLE_DRIVER; + default: + assert(!"Invalid query type"); + } + + slot_size = sizeof(struct anv_query_pool_slot); + pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->type = pCreateInfo->queryType; + pool->slots = pCreateInfo->queryCount; + + size = pCreateInfo->queryCount * slot_size; + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_free2(&device->alloc, pAllocator, pool); + + return result; +} + +void anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_free2(&device->alloc, pAllocator, pool); +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void* pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + int64_t timeout = INT64_MAX; + uint64_t result; + int ret; + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "gem_wait failed %m"); + } + } + + void *data_end = pData + dataSize; + struct anv_query_pool_slot *slot = pool->bo.map; + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: { + result = slot[firstQuery + i].end - slot[firstQuery + i].begin; + break; + } + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + unreachable("pipeline stats not supported"); + case VK_QUERY_TYPE_TIMESTAMP: { + result = slot[firstQuery + i].begin; + break; + } + default: + unreachable("invalid pool type"); + } + + if (flags & VK_QUERY_RESULT_64_BIT) { + uint64_t *dst = pData; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } else { + uint32_t *dst = pData; + if (result > UINT32_MAX) + result = UINT32_MAX; + dst[0] = result; + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + dst[1] = slot[firstQuery + i].available; + } + + pData += stride; + if (pData >= data_end) + break; + } + + return VK_SUCCESS; +} + +void anv_CmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + for (uint32_t i = 0; i < queryCount; i++) { + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: { + struct anv_query_pool_slot *slot = pool->bo.map; + slot[firstQuery + i].available = 0; + break; + } + default: + assert(!"Invalid query type"); + } + } +} diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c new file mode 100644 index 0000000..22fd01c --- /dev/null +++ b/src/intel/vulkan/anv_util.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +VkResult +__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + +#define ERROR_CASE(error) case error: error_str = #error; break; + + const char *error_str; + switch ((int32_t)error) { + + /* Core errors */ + ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) + ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) + ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) + ERROR_CASE(VK_ERROR_DEVICE_LOST) + ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) + ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) + ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + + /* Extension errors */ + ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + + default: + assert(!"Unknown error"); + error_str = "unknown error"; + } + +#undef ERROR_CASE + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(util_is_power_of_two(size)); + assert(element_size < size && util_is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c new file mode 100644 index 0000000..c5911a3 --- /dev/null +++ b/src/intel/vulkan/anv_wsi.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_wsi.h" + +VkResult +anv_init_wsi(struct anv_instance *instance) +{ + VkResult result; + + result = anv_x11_init_wsi(instance); + if (result != VK_SUCCESS) + return result; + +#ifdef HAVE_WAYLAND_PLATFORM + result = anv_wl_init_wsi(instance); + if (result != VK_SUCCESS) { + anv_x11_finish_wsi(instance); + return result; + } +#endif + + return VK_SUCCESS; +} + +void +anv_finish_wsi(struct anv_instance *instance) +{ +#ifdef HAVE_WAYLAND_PLATFORM + anv_wl_finish_wsi(instance); +#endif + anv_x11_finish_wsi(instance); +} + +void anv_DestroySurfaceKHR( + VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + + anv_free2(&instance->alloc, pAllocator, surface); +} + +VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR _surface, + VkBool32* pSupported) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_support(surface, device, queueFamilyIndex, pSupported); +} + +VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_capabilities(surface, device, pSurfaceCapabilities); +} + +VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_formats(surface, device, pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR _surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + + return iface->get_present_modes(surface, device, pPresentModeCount, + pPresentModes); +} + +VkResult anv_CreateSwapchainKHR( + VkDevice _device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSwapchainKHR* pSwapchain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); + struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; + struct anv_swapchain *swapchain; + + VkResult result = iface->create_swapchain(surface, device, pCreateInfo, + pAllocator, &swapchain); + if (result != VK_SUCCESS) + return result; + + *pSwapchain = anv_swapchain_to_handle(swapchain); + + return VK_SUCCESS; +} + +void anv_DestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + swapchain->destroy(swapchain, pAllocator); +} + +VkResult anv_GetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint32_t* pSwapchainImageCount, + VkImage* pSwapchainImages) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult anv_AcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR _swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t* pImageIndex) +{ + ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); + + return swapchain->acquire_next_image(swapchain, timeout, semaphore, + pImageIndex); +} + +VkResult anv_QueuePresentKHR( + VkQueue _queue, + const VkPresentInfoKHR* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + VkResult result; + + for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { + ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); + + assert(swapchain->device == queue->device); + + result = swapchain->queue_present(swapchain, queue, + pPresentInfo->pImageIndices[i]); + /* TODO: What if one of them returns OUT_OF_DATE? */ + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/anv_wsi.h b/src/intel/vulkan/anv_wsi.h new file mode 100644 index 0000000..6e9ff9b --- /dev/null +++ b/src/intel/vulkan/anv_wsi.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "anv_private.h" + +struct anv_swapchain; + +struct anv_wsi_interface { + VkResult (*get_support)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported); + VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); + VkResult (*get_formats)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats); + VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes); + VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); +}; + +struct anv_swapchain { + struct anv_device *device; + + VkResult (*destroy)(struct anv_swapchain *swapchain, + const VkAllocationCallbacks *pAllocator); + VkResult (*get_images)(struct anv_swapchain *swapchain, + uint32_t *pCount, VkImage *pSwapchainImages); + VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, + uint64_t timeout, VkSemaphore semaphore, + uint32_t *image_index); + VkResult (*queue_present)(struct anv_swapchain *swap_chain, + struct anv_queue *queue, + uint32_t image_index); +}; + +ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) + +VkResult anv_x11_init_wsi(struct anv_instance *instance); +void anv_x11_finish_wsi(struct anv_instance *instance); +VkResult anv_wl_init_wsi(struct anv_instance *instance); +void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/intel/vulkan/anv_wsi_wayland.c b/src/intel/vulkan/anv_wsi_wayland.c new file mode 100644 index 0000000..6f25eaf --- /dev/null +++ b/src/intel/vulkan/anv_wsi_wayland.c @@ -0,0 +1,871 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_wsi.h" + +#include + +#define MIN_NUM_IMAGES 2 + +struct wsi_wl_display { + struct wl_display * display; + struct wl_drm * drm; + + /* Vector of VkFormats supported */ + struct anv_vector formats; + + uint32_t capabilities; +}; + +struct wsi_wayland { + struct anv_wsi_interface base; + + struct anv_instance * instance; + + pthread_mutex_t mutex; + /* Hash table of wl_display -> wsi_wl_display mappings */ + struct hash_table * displays; +}; + +static void +wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) +{ + /* Don't add a format that's already in the list */ + VkFormat *f; + anv_vector_foreach(f, &display->formats) + if (*f == format) + return; + + /* Don't add formats which aren't supported by the driver */ + if (anv_format_for_vk_format(format)->isl_format == + ISL_FORMAT_UNSUPPORTED) { + return; + } + + f = anv_vector_add(&display->formats); + if (f) + *f = format; +} + +static void +drm_handle_device(void *data, struct wl_drm *drm, const char *name) +{ + fprintf(stderr, "wl_drm.device(%s)\n", name); +} + +static uint32_t +wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) +{ + switch (vk_format) { + /* TODO: Figure out what all the formats mean and make this table + * correct. + */ +#if 0 + case VK_FORMAT_R4G4B4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; + case VK_FORMAT_R5G6B5_UNORM: + return WL_DRM_FORMAT_BGR565; + case VK_FORMAT_R5G5B5A1_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; + case VK_FORMAT_R8G8B8_UNORM: + return WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R8G8B8A8_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; + case VK_FORMAT_R10G10B10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; + case VK_FORMAT_B4G4R4A4_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; + case VK_FORMAT_B5G6R5_UNORM: + return WL_DRM_FORMAT_RGB565; + case VK_FORMAT_B5G5R5A1_UNORM: + return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; +#endif + case VK_FORMAT_B8G8R8_SRGB: + return WL_DRM_FORMAT_BGRX8888; + case VK_FORMAT_B8G8R8A8_SRGB: + return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; +#if 0 + case VK_FORMAT_B10G10R10A2_UNORM: + return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; +#endif + + default: + assert("!Unsupported Vulkan format"); + return 0; + } +} + +static void +drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) +{ + struct wsi_wl_display *display = data; + + switch (wl_format) { +#if 0 + case WL_DRM_FORMAT_ABGR4444: + case WL_DRM_FORMAT_XBGR4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); + break; + case WL_DRM_FORMAT_BGR565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); + break; + case WL_DRM_FORMAT_ABGR1555: + case WL_DRM_FORMAT_XBGR1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); + break; + case WL_DRM_FORMAT_XBGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); + /* fallthrough */ + case WL_DRM_FORMAT_ABGR8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); + break; + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); + break; + case WL_DRM_FORMAT_ARGB4444: + case WL_DRM_FORMAT_XRGB4444: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); + break; + case WL_DRM_FORMAT_RGB565: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); + break; + case WL_DRM_FORMAT_ARGB1555: + case WL_DRM_FORMAT_XRGB1555: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); + break; +#endif + case WL_DRM_FORMAT_XRGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); + /* fallthrough */ + case WL_DRM_FORMAT_ARGB8888: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); + break; +#if 0 + case WL_DRM_FORMAT_ARGB2101010: + case WL_DRM_FORMAT_XRGB2101010: + wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); + break; +#endif + } +} + +static void +drm_handle_authenticated(void *data, struct wl_drm *drm) +{ +} + +static void +drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) +{ + struct wsi_wl_display *display = data; + + display->capabilities = capabilities; +} + +static const struct wl_drm_listener drm_listener = { + drm_handle_device, + drm_handle_format, + drm_handle_authenticated, + drm_handle_capabilities, +}; + +static void +registry_handle_global(void *data, struct wl_registry *registry, + uint32_t name, const char *interface, uint32_t version) +{ + struct wsi_wl_display *display = data; + + if (strcmp(interface, "wl_drm") == 0) { + assert(display->drm == NULL); + + assert(version >= 2); + display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); + + if (display->drm) + wl_drm_add_listener(display->drm, &drm_listener, display); + } +} + +static void +registry_handle_global_remove(void *data, struct wl_registry *registry, + uint32_t name) +{ /* No-op */ } + +static const struct wl_registry_listener registry_listener = { + registry_handle_global, + registry_handle_global_remove +}; + +static void +wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) +{ + anv_vector_finish(&display->formats); + if (display->drm) + wl_drm_destroy(display->drm); + anv_free(&wsi->instance->alloc, display); +} + +static struct wsi_wl_display * +wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) +{ + struct wsi_wl_display *display = + anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!display) + return NULL; + + memset(display, 0, sizeof(*display)); + + display->display = wl_display; + + if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) + goto fail; + + struct wl_registry *registry = wl_display_get_registry(wl_display); + if (!registry) + return NULL; + + wl_registry_add_listener(registry, ®istry_listener, display); + + /* Round-rip to get the wl_drm global */ + wl_display_roundtrip(wl_display); + + if (!display->drm) + goto fail; + + /* Round-rip to get wl_drm formats and capabilities */ + wl_display_roundtrip(wl_display); + + /* We need prime support */ + if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) + goto fail; + + /* We don't need this anymore */ + wl_registry_destroy(registry); + + return display; + +fail: + if (registry) + wl_registry_destroy(registry); + + wsi_wl_display_destroy(wsi, display); + return NULL; +} + +static struct wsi_wl_display * +wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, + wl_display); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->displays, wl_display); + if (entry) { + /* Oops, someone raced us to it */ + wsi_wl_display_destroy(wsi, display); + } else { + entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display* display) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_display(physical_device->instance, display) != NULL; +} + +static VkResult +wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + *pSupported = true; + + return VK_SUCCESS; +} + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, + VK_PRESENT_MODE_FIFO_KHR, +}; + +static VkResult +wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR* caps) +{ + caps->minImageCount = MIN_NUM_IMAGES; + caps->maxImageCount = 4; + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + + caps->supportedCompositeAlpha = + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_display *display = + wsi_wl_get_display(device->instance, surface->display); + + uint32_t count = anv_vector_length(&display->formats); + + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = count; + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= count); + *pSurfaceFormatCount = count; + + VkFormat *f; + anv_vector_foreach(f, &display->formats) { + *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { + .format = *f, + /* TODO: We should get this from the compositor somehow */ + .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, + }; + } + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateWaylandSurfaceKHR( + VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceWayland *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; + surface->display = pCreateInfo->display; + surface->surface = pCreateInfo->surface; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct wsi_wl_image { + struct anv_image * image; + struct anv_device_memory * memory; + struct wl_buffer * buffer; + bool busy; +}; + +struct wsi_wl_swapchain { + struct anv_swapchain base; + + struct wsi_wl_display * display; + struct wl_event_queue * queue; + struct wl_surface * surface; + + VkExtent2D extent; + VkFormat vk_format; + uint32_t drm_format; + + VkPresentModeKHR present_mode; + bool fifo_ready; + + uint32_t image_count; + struct wsi_wl_image images[0]; +}; + +static VkResult +wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, + uint32_t *pCount, VkImage *pSwapchainImages) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + int ret = wl_display_dispatch_queue_pending(chain->display->display, + chain->queue); + /* XXX: I'm not sure if out-of-date is the right error here. If + * wl_display_dispatch_queue_pending fails it most likely means we got + * kicked by the server so this seems more-or-less correct. + */ + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + + while (1) { + for (uint32_t i = 0; i < chain->image_count; i++) { + if (!chain->images[i].busy) { + /* We found a non-busy image */ + *image_index = i; + return VK_SUCCESS; + } + } + + /* This time we do a blocking dispatch because we can't go + * anywhere until we get an event. + */ + int ret = wl_display_roundtrip_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } +} + +static void +frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) +{ + struct wsi_wl_swapchain *chain = data; + + chain->fifo_ready = true; + + wl_callback_destroy(callback); +} + +static const struct wl_callback_listener frame_listener = { + frame_handle_done, +}; + +static VkResult +wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + while (!chain->fifo_ready) { + int ret = wl_display_dispatch_queue(chain->display->display, + chain->queue); + if (ret < 0) + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + } + + assert(image_index < chain->image_count); + wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); + wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); + + if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { + struct wl_callback *frame = wl_surface_frame(chain->surface); + wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); + wl_callback_add_listener(frame, &frame_listener, chain); + chain->fifo_ready = false; + } + + chain->images[image_index].busy = true; + wl_surface_commit(chain->surface); + wl_display_flush(chain->display->display); + + return VK_SUCCESS; +} + +static void +wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), + pAllocator); + anv_DestroyImage(vk_device, anv_image_to_handle(image->image), + pAllocator); +} + +static void +buffer_handle_release(void *data, struct wl_buffer *buffer) +{ + struct wsi_wl_image *image = data; + + assert(image->buffer == buffer); + + image->busy = false; +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static VkResult +wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, + const VkAllocationCallbacks* pAllocator) +{ + VkDevice vk_device = anv_device_to_handle(chain->base.device); + VkResult result; + + VkImage vk_image; + result = anv_image_create(vk_device, + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = chain->vk_format, + .extent = { + .width = chain->extent.width, + .height = chain->extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + pAllocator, + &vk_image); + + if (result != VK_SUCCESS) + return result; + + image->image = anv_image_from_handle(vk_image); + assert(anv_format_is_color(image->image->format)); + + struct anv_surface *surface = &image->image->color_surface; + + VkDeviceMemory vk_memory; + result = anv_AllocateMemory(vk_device, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->image->size, + .memoryTypeIndex = 0, + }, + pAllocator, + &vk_memory); + + if (result != VK_SUCCESS) + goto fail_image; + + image->memory = anv_device_memory_from_handle(vk_memory); + image->memory->bo.is_winsys_bo = true; + + result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); + + if (result != VK_SUCCESS) + goto fail_mem; + + int ret = anv_gem_set_tiling(chain->base.device, + image->memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + int fd = anv_gem_handle_to_fd(chain->base.device, + image->memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail_mem; + } + + image->buffer = wl_drm_create_prime_buffer(chain->display->drm, + fd, /* name */ + chain->extent.width, + chain->extent.height, + chain->drm_format, + surface->offset, + surface->isl.row_pitch, + 0, 0, 0, 0 /* unused */); + wl_display_roundtrip(chain->display->display); + close(fd); + + wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); + wl_buffer_add_listener(image->buffer, &buffer_listener, image); + + return VK_SUCCESS; + +fail_mem: + anv_FreeMemory(vk_device, vk_memory, pAllocator); +fail_image: + anv_DestroyImage(vk_device, vk_image, pAllocator); + + return result; +} + +static VkResult +wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; + + for (uint32_t i = 0; i < chain->image_count; i++) { + if (chain->images[i].buffer) + wsi_wl_image_finish(chain, &chain->images[i], pAllocator); + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; + struct wsi_wl_swapchain *chain; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + int num_images = pCreateInfo->minImageCount; + + assert(num_images >= MIN_NUM_IMAGES); + + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) + num_images = MAX2(num_images, 4); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = wsi_wl_swapchain_destroy; + chain->base.get_images = wsi_wl_swapchain_get_images; + chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; + chain->base.queue_present = wsi_wl_swapchain_queue_present; + + chain->surface = surface->surface; + chain->extent = pCreateInfo->imageExtent; + chain->vk_format = pCreateInfo->imageFormat; + chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); + + chain->present_mode = pCreateInfo->presentMode; + chain->fifo_ready = true; + + chain->image_count = num_images; + + /* Mark a bunch of stuff as NULL. This way we can just call + * destroy_swapchain for cleanup. + */ + for (uint32_t i = 0; i < chain->image_count; i++) + chain->images[i].buffer = NULL; + chain->queue = NULL; + + chain->display = wsi_wl_get_display(device->instance, surface->display); + if (!chain->display) + goto fail; + + chain->queue = wl_display_create_queue(chain->display->display); + if (!chain->queue) + goto fail; + + for (uint32_t i = 0; i < chain->image_count; i++) { + result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); + if (result != VK_SUCCESS) + goto fail; + chain->images[i].busy = false; + } + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + +fail: + wsi_wl_swapchain_destroy(&chain->base, pAllocator); + + return result; +} + +VkResult +anv_wl_init_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + wsi->instance = instance; + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->displays) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = wsi_wl_surface_get_support; + wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; + wsi->base.get_formats = wsi_wl_surface_get_formats; + wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; + wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); + +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; + + return result; +} + +void +anv_wl_finish_wsi(struct anv_instance *instance) +{ + struct wsi_wayland *wsi = + (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->displays, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/anv_wsi_x11.c b/src/intel/vulkan/anv_wsi_x11.c new file mode 100644 index 0000000..843a6b6 --- /dev/null +++ b/src/intel/vulkan/anv_wsi_x11.c @@ -0,0 +1,758 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_wsi.h" + +#include "util/hash_table.h" + +struct wsi_x11_connection { + bool has_dri3; + bool has_present; +}; + +struct wsi_x11 { + struct anv_wsi_interface base; + + pthread_mutex_t mutex; + /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + struct hash_table *connections; +}; + +static struct wsi_x11_connection * +wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) +{ + xcb_query_extension_cookie_t dri3_cookie, pres_cookie; + xcb_query_extension_reply_t *dri3_reply, *pres_reply; + + struct wsi_x11_connection *wsi_conn = + anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi_conn) + return NULL; + + dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); + pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); + + dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); + pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); + if (dri3_reply == NULL || pres_reply == NULL) { + free(dri3_reply); + free(pres_reply); + anv_free(&instance->alloc, wsi_conn); + return NULL; + } + + wsi_conn->has_dri3 = dri3_reply->present != 0; + wsi_conn->has_present = pres_reply->present != 0; + + free(dri3_reply); + free(pres_reply); + + return wsi_conn; +} + +static void +wsi_x11_connection_destroy(struct anv_instance *instance, + struct wsi_x11_connection *conn) +{ + anv_free(&instance->alloc, conn); +} + +static struct wsi_x11_connection * +wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + pthread_mutex_lock(&wsi->mutex); + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + if (!entry) { + /* We're about to make a bunch of blocking calls. Let's drop the + * mutex for now so we don't block up too badly. + */ + pthread_mutex_unlock(&wsi->mutex); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_connection_create(instance, conn); + + pthread_mutex_lock(&wsi->mutex); + + entry = _mesa_hash_table_search(wsi->connections, conn); + if (entry) { + /* Oops, someone raced us to it */ + wsi_x11_connection_destroy(instance, wsi_conn); + } else { + entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + } + } + + pthread_mutex_unlock(&wsi->mutex); + + return entry->data; +} + +static const VkSurfaceFormatKHR formats[] = { + { .format = VK_FORMAT_B8G8R8A8_SRGB, }, +}; + +static const VkPresentModeKHR present_modes[] = { + VK_PRESENT_MODE_MAILBOX_KHR, +}; + +static xcb_screen_t * +get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + if (screen_iter.data->root == root) + return screen_iter.data; + } + + return NULL; +} + +static xcb_visualtype_t * +screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_depth_iterator_t depth_iter = + xcb_screen_allowed_depths_iterator(screen); + + for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { + xcb_visualtype_iterator_t visual_iter = + xcb_depth_visuals_iterator (depth_iter.data); + + for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { + if (visual_iter.data->visual_id == visual_id) { + if (depth) + *depth = depth_iter.data->depth; + return visual_iter.data; + } + } + } + + return NULL; +} + +static xcb_visualtype_t * +connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, + unsigned *depth) +{ + xcb_screen_iterator_t screen_iter = + xcb_setup_roots_iterator(xcb_get_setup(conn)); + + /* For this we have to iterate over all of the screens which is rather + * annoying. Fortunately, there is probably only 1. + */ + for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { + xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, + visual_id, depth); + if (visual) + return visual; + } + + return NULL; +} + +static xcb_visualtype_t * +get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, + unsigned *depth) +{ + xcb_query_tree_cookie_t tree_cookie; + xcb_get_window_attributes_cookie_t attrib_cookie; + xcb_query_tree_reply_t *tree; + xcb_get_window_attributes_reply_t *attrib; + + tree_cookie = xcb_query_tree(conn, window); + attrib_cookie = xcb_get_window_attributes(conn, window); + + tree = xcb_query_tree_reply(conn, tree_cookie, NULL); + attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); + if (attrib == NULL || tree == NULL) { + free(attrib); + free(tree); + return NULL; + } + + xcb_window_t root = tree->root; + xcb_visualid_t visual_id = attrib->visual; + free(attrib); + free(tree); + + xcb_screen_t *screen = get_screen_for_root(conn, root); + if (screen == NULL) + return NULL; + + return screen_get_visualtype(screen, visual_id, depth); +} + +static bool +visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) +{ + uint32_t rgb_mask = visual->red_mask | + visual->green_mask | + visual->blue_mask; + + uint32_t all_mask = 0xffffffff >> (32 - depth); + + /* Do we have bits left over after RGB? */ + return (all_mask & ~rgb_mask) != 0; +} + +VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + xcb_connection_t* connection, + xcb_visualid_t visual_id) +{ + ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, connection); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + return false; + } + + unsigned visual_depth; + if (!connection_get_visualtype(connection, visual_id, &visual_depth)) + return false; + + if (visual_depth != 24 && visual_depth != 32) + return false; + + return true; +} + +static VkResult +x11_surface_get_support(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + uint32_t queueFamilyIndex, + VkBool32* pSupported) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + + struct wsi_x11_connection *wsi_conn = + wsi_x11_get_connection(device->instance, surface->connection); + if (!wsi_conn) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + if (!wsi_conn->has_dri3) { + fprintf(stderr, "vulkan: No DRI3 support\n"); + *pSupported = false; + return VK_SUCCESS; + } + + unsigned visual_depth; + if (!get_visualtype_for_window(surface->connection, surface->window, + &visual_depth)) { + *pSupported = false; + return VK_SUCCESS; + } + + if (visual_depth != 24 && visual_depth != 32) { + *pSupported = false; + return VK_SUCCESS; + } + + *pSupported = true; + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, + struct anv_physical_device *device, + VkSurfaceCapabilitiesKHR *caps) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + xcb_get_geometry_cookie_t geom_cookie; + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom; + unsigned visual_depth; + + geom_cookie = xcb_get_geometry(surface->connection, surface->window); + + /* This does a round-trip. This is why we do get_geometry first and + * wait to read the reply until after we have a visual. + */ + xcb_visualtype_t *visual = + get_visualtype_for_window(surface->connection, surface->window, + &visual_depth); + + geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); + if (geom) { + VkExtent2D extent = { geom->width, geom->height }; + caps->currentExtent = extent; + caps->minImageExtent = extent; + caps->maxImageExtent = extent; + } else { + /* This can happen if the client didn't wait for the configure event + * to come back from the compositor. In that case, we don't know the + * size of the window so we just return valid "I don't know" stuff. + */ + caps->currentExtent = (VkExtent2D) { -1, -1 }; + caps->minImageExtent = (VkExtent2D) { 1, 1 }; + caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; + } + free(err); + free(geom); + + if (visual_has_alpha(visual, visual_depth)) { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; + } else { + caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + } + + caps->minImageCount = 2; + caps->maxImageCount = 4; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + caps->maxImageArrayLayers = 1; + caps->supportedUsageFlags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_formats(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + if (pSurfaceFormats == NULL) { + *pSurfaceFormatCount = ARRAY_SIZE(formats); + return VK_SUCCESS; + } + + assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); + *pSurfaceFormatCount = ARRAY_SIZE(formats); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_get_present_modes(VkIcdSurfaceBase *surface, + struct anv_physical_device *device, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + if (pPresentModes == NULL) { + *pPresentModeCount = ARRAY_SIZE(present_modes); + return VK_SUCCESS; + } + + assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); + *pPresentModeCount = ARRAY_SIZE(present_modes); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain); + +VkResult anv_CreateXcbSurfaceKHR( + VkInstance _instance, + const VkXcbSurfaceCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSurfaceKHR* pSurface) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); + + VkIcdSurfaceXcb *surface; + + surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (surface == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; + surface->connection = pCreateInfo->connection; + surface->window = pCreateInfo->window; + + *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); + + return VK_SUCCESS; +} + +struct x11_image { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + xcb_get_geometry_cookie_t geom_cookie; + bool busy; +}; + +struct x11_swapchain { + struct anv_swapchain base; + + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t image_count; + uint32_t next_image; + struct x11_image images[0]; +}; + +static VkResult +x11_get_images(struct anv_swapchain *anv_chain, + uint32_t* pCount, VkImage *pSwapchainImages) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + + if (pSwapchainImages == NULL) { + *pCount = chain->image_count; + return VK_SUCCESS; + } + + assert(chain->image_count <= *pCount); + for (uint32_t i = 0; i < chain->image_count; i++) + pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); + + *pCount = chain->image_count; + + return VK_SUCCESS; +} + +static VkResult +x11_acquire_next_image(struct anv_swapchain *anv_chain, + uint64_t timeout, + VkSemaphore semaphore, + uint32_t *image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[chain->next_image]; + + if (image->busy) { + xcb_generic_error_t *err; + xcb_get_geometry_reply_t *geom = + xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); + if (!geom) { + free(err); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + + if (geom->width != chain->extent.width || + geom->height != chain->extent.height) { + free(geom); + return vk_error(VK_ERROR_OUT_OF_DATE_KHR); + } + free(geom); + + image->busy = false; + } + + *image_index = chain->next_image; + chain->next_image = (chain->next_image + 1) % chain->image_count; + return VK_SUCCESS; +} + +static VkResult +x11_queue_present(struct anv_swapchain *anv_chain, + struct anv_queue *queue, + uint32_t image_index) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + struct x11_image *image = &chain->images[image_index]; + + assert(image_index < chain->image_count); + + xcb_void_cookie_t cookie; + + cookie = xcb_copy_area(chain->conn, + image->pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); + image->busy = true; + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} + +static VkResult +x11_swapchain_destroy(struct anv_swapchain *anv_chain, + const VkAllocationCallbacks *pAllocator) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + xcb_void_cookie_t cookie; + + for (uint32_t i = 0; i < chain->image_count; i++) { + struct x11_image *image = &chain->images[i]; + + if (image->busy) + xcb_discard_reply(chain->conn, image->geom_cookie.sequence); + + cookie = xcb_free_pixmap(chain->conn, image->pixmap); + xcb_discard_reply(chain->conn, cookie.sequence); + + /* TODO: Delete images and free memory */ + } + + anv_free2(&chain->base.device->alloc, pAllocator, chain); + + return VK_SUCCESS; +} + +static VkResult +x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, + struct anv_device *device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks* pAllocator, + struct anv_swapchain **swapchain_out) +{ + VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; + struct x11_swapchain *chain; + xcb_void_cookie_t cookie; + VkResult result; + + int num_images = pCreateInfo->minImageCount; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); + + size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); + chain = anv_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->base.device = device; + chain->base.destroy = x11_swapchain_destroy; + chain->base.get_images = x11_get_images; + chain->base.acquire_next_image = x11_acquire_next_image; + chain->base.queue_present = x11_queue_present; + + chain->conn = surface->connection; + chain->window = surface->window; + chain->extent = pCreateInfo->imageExtent; + chain->image_count = num_images; + chain->next_image = 0; + + for (uint32_t i = 0; i < chain->image_count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(anv_device_to_handle(device), + &(struct anv_image_create_info) { + .isl_tiling_flags = ISL_TILING_X_BIT, + .stride = 0, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + NULL, + &image_h); + + image = anv_image_from_handle(image_h); + assert(anv_format_is_color(image->format)); + + surface = &image->color_surface; + + anv_AllocateMemory(anv_device_to_handle(device), + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + NULL /* XXX: pAllocator */, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + memory->bo.is_winsys_bo = true; + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->isl.row_pitch, I915_TILING_X); + if (ret) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "set_tiling failed: %m"); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + /* FINISHME: Choose a better error. */ + result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, + "handle_to_fd failed: %m"); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->isl.row_pitch, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + chain->images[i].busy = false; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *swapchain_out = &chain->base; + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult +anv_x11_init_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi; + VkResult result; + + wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!wsi) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + int ret = pthread_mutex_init(&wsi->mutex, NULL); + if (ret != 0) { + if (ret == ENOMEM) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } else { + /* FINISHME: Choose a better error. */ + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + goto fail_alloc; + } + + wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + if (!wsi->connections) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mutex; + } + + wsi->base.get_support = x11_surface_get_support; + wsi->base.get_capabilities = x11_surface_get_capabilities; + wsi->base.get_formats = x11_surface_get_formats; + wsi->base.get_present_modes = x11_surface_get_present_modes; + wsi->base.create_swapchain = x11_surface_create_swapchain; + + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; + + return VK_SUCCESS; + +fail_mutex: + pthread_mutex_destroy(&wsi->mutex); +fail_alloc: + anv_free(&instance->alloc, wsi); +fail: + instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; + + return result; +} + +void +anv_x11_finish_wsi(struct anv_instance *instance) +{ + struct wsi_x11 *wsi = + (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; + + if (wsi) { + _mesa_hash_table_destroy(wsi->connections, NULL); + + pthread_mutex_destroy(&wsi->mutex); + + anv_free(&instance->alloc, wsi); + } +} diff --git a/src/intel/vulkan/dev_icd.json.in b/src/intel/vulkan/dev_icd.json.in new file mode 100644 index 0000000..8492036 --- /dev/null +++ b/src/intel/vulkan/dev_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@build_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c new file mode 100644 index 0000000..23327ec --- /dev/null +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -0,0 +1,589 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer0 = { .offset = state.offset }, + .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, + uint32_t stages) +{ + static const uint32_t sampler_state_opcodes[] = { + [MESA_SHADER_VERTEX] = 43, + [MESA_SHADER_TESS_CTRL] = 44, /* HS */ + [MESA_SHADER_TESS_EVAL] = 45, /* DS */ + [MESA_SHADER_GEOMETRY] = 46, + [MESA_SHADER_FRAGMENT] = 47, + [MESA_SHADER_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [MESA_SHADER_VERTEX] = 38, + [MESA_SHADER_TESS_CTRL] = 39, + [MESA_SHADER_TESS_EVAL] = 40, + [MESA_SHADER_GEOMETRY] = 41, + [MESA_SHADER_FRAGMENT] = 42, + [MESA_SHADER_COMPUTE] = 0, + }; + + anv_foreach_stage(s, stages) { + if (cmd_buffer->state.samplers[s].alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[s], + .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); + } + + /* Always emit binding table pointers if we're asked to, since on SKL + * this is what flushes push constants. */ + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[s], + .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); + } +} + +GENX_FUNC(GEN7, GEN7) uint32_t +genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) +{ + VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + break; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + dirty |= cmd_buffer->state.pipeline->active_stages; + anv_foreach_stage(s, dirty) { + result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, + &cmd_buffer->state.samplers[s]); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, + &cmd_buffer->state.binding_tables[s]); + if (result != VK_SUCCESS) + return result; + } + } + + cmd_buffer->state.descriptors_dirty &= ~dirty; + + return dirty; +} + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +static void +emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkRect2D *scissors) +{ + struct anv_state scissor_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkRect2D *s = &scissors[i]; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN7_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN7_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, + &empty_scissor); + } else { + GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); + } + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = scissor_state.offset); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(scissor_state); +} + +GENX_FUNC(GEN7, GEN7) void +genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.scissor.count > 0) { + emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, + cmd_buffer->state.dynamic.scissor.scissors); + } else { + /* Emit a default scissor based on the currently bound framebuffer */ + emit_scissor_state(cmd_buffer, 1, + &(VkRect2D) { + .offset = { .x = 0, .y = 0, }, + .extent = { + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + }, + }); + } +} + +static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, +}; + +static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, +}; + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; + if (ANV_IS_HASWELL) + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + cmd_buffer->state.gen7.index_buffer = buffer; + cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; + cmd_buffer->state.gen7.index_offset = offset; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_INTERFACE_DESCRIPTOR_DATA, 64, + .KernelStartPointer = pipeline->cs_simd, + .BindingTablePointer = surfaces.offset, + .SamplerStatePointer = samplers.offset, + .ConstantURBEntryReadLength = + push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + /* FIXME: figure out descriptors for gen7 */ + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN7_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN7_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, + .VertexBufferMemoryObjectControlState = GEN7_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, + .InstanceDataStepRate = 1 + }; + + GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + gen7_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || + cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, + * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, + * 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one + * PIPE_CONTROL needs to be sent before any combination of VS + * associated 3DSTATE." + */ + anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &cmd_buffer->device->workaround_bo, 0 }); + } + + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) { + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + } + + if (cmd_buffer->state.push_constants_dirty) + cmd_buffer_flush_push_constants(cmd_buffer); + + /* We use the gen8 state here because it only contains the additional + * min/max fields and, since they occur at the end of the packet and + * don't change the stride, they work on gen7 too. + */ + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { + + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const uint32_t depth_format = image ? + isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, + &image->depth_surface.isl) : D16_UNORM; + + uint32_t sf_dw[GEN7_3DSTATE_SF_length]; + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + .DepthBufferSurfaceFormat = depth_format, + .LineWidth = cmd_buffer->state.dynamic.line_width, + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); + + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN7_COLOR_CALC_STATE_length * 4, + 64); + struct GEN7_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_RENDER_TARGETS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; + + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + + struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { + .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); + + struct anv_state ds_state = + anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, + pipeline->gen7.depth_stencil_state, + GEN7_DEPTH_STENCIL_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + .PointertoDEPTH_STENCIL_STATE = ds_state.offset); + } + + if (cmd_buffer->state.gen7.index_buffer && + cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; + uint32_t offset = cmd_buffer->state.gen7.index_offset; + + if (ANV_IS_HASWELL) { + anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index); + } + + anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, + .CutIndexEnable = pipeline->primitive_restart, + .IndexFormat = cmd_buffer->state.gen7.index_type, + .MemoryObjectControlState = GEN7_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + stub(); +} diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c new file mode 100644 index 0000000..7c054fa --- /dev/null +++ b/src/intel/vulkan/gen7_pipeline.c @@ -0,0 +1,410 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_pipeline_util.h" + +static void +gen7_emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + struct GEN7_3DSTATE_SF sf = { + GEN7_3DSTATE_SF_header, + + /* LegacyGlobalDepthBiasEnable */ + + .StatisticsEnable = true, + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ViewTransformEnable = !(extra && extra->disable_viewport), + .FrontWinding = vk_to_gen_front_face[info->frontFace], + /* bool AntiAliasingEnable; */ + + .CullMode = vk_to_gen_cullmode[info->cullMode], + + /* uint32_t LineEndCapAntialiasingRegionWidth; */ + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + + /* uint32_t MultisampleRasterizationMode; */ + /* bool LastPixelEnable; */ + + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + + /* uint32_t AALineDistanceMode; */ + /* uint32_t VertexSubPixelPrecisionSelect; */ + .UsePointWidthState = !pipeline->writes_point_size, + .PointWidth = 1.0, + }; + + GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); +} + +static void +gen7_emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen7.depth_stencil_state, 0, + sizeof(pipeline->gen7.depth_stencil_state)); + return; + } + + struct GEN7_DEPTH_STENCIL_STATE state = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], + .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); +} + +static void +gen7_emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + if (info == NULL || info->attachmentCount == 0) { + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + .ColorBufferBlendEnable = false, + .WriteDisableAlpha = true, + .WriteDisableRed = true, + .WriteDisableGreen = true, + .WriteDisableBlue = true); + } else { + /* FIXME-GEN7: All render targets share blend state settings on gen7, we + * can't implement this. + */ + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; + pipeline->blend_state = + anv_state_pool_emit(&device->dynamic_state_pool, + GEN7_BLEND_STATE, 64, + + .ColorBufferBlendEnable = a->blendEnable, + .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + +# if 0 + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; +# endif + + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + +# if 0 + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +# endif + ); + } + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset); +} + +GENX_FUNC(GEN7, GEN75) VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + + assert(pCreateInfo->pRasterizationState); + gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); + + gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + + gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + const VkPipelineRasterizationStateCreateInfo *rs_info = + pCreateInfo->pRasterizationState; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, + .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], + .CullMode = vk_to_gen_cullmode[rs_info->cullMode], + .ClipEnable = true, + .APIMode = APIMODE_OGL, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .ClipMode = CLIPMODE_NORMAL, + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterizationSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, + .PixelLocation = PIXLOC_CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xff); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* The last geometry producing stage will set urb_offset and urb_length, + * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ + uint32_t urb_offset = 1; + uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; + +#if 0 + /* From gen7_vs_state.c */ + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (!brw->is_haswell && !brw->is_baytrail) + gen7_emit_vs_workaround_flush(brw); +#endif + + if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = pipeline->vs_vec4, + .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterforURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = true, + .VSFunctionEnable = true); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + + if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); + } else { + urb_offset = 1; + urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .KernelStartPointer = pipeline->gs_kernel, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterforURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads - 1, + /* This in the next dword on HSW. */ + .ControlDataFormat = gs_prog_data->control_data_format, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .GSStatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, +# if (ANV_IS_HASWELL) + .ReorderMode = REORDER_TRAILING, +# else + .ReorderEnable = true, +# endif + .GSEnable = true); + } + + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_finishme("disabling ps"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = false, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT); + + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + + } else { + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || + wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) + anv_finishme("two-sided color needs sbe swizzling setup"); + if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) + anv_finishme("primitive_id needs sbe swizzling setup"); + + /* FIXME: generated header doesn't emit attr swizzle fields */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, + .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .VertexURBEntryReadLength = urb_length, + .VertexURBEntryReadOffset = urb_offset, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreads = device->info.max_wm_threads - 1, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + + .RenderTargetFastClearEnable = false, + .DualSourceBlendEnable = false, + .RenderTargetResolveEnable = false, + + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE : POSOFFSET_NONE, + + ._32PixelDispatchEnable = false, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + + .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterforConstantSetupData1 = 0, + .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, + +#if 0 + /* Haswell requires the sample mask to be set in this packet as well as + * in 3DSTATE_SAMPLE_MASK; the values should match. */ + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ +#endif + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ + anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, + .StatisticsEnable = true, + .ThreadDispatchEnable = true, + .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ + .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ + .EarlyDepthStencilControl = EDSC_NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, + .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, + .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen7_state.c b/src/intel/vulkan/gen7_state.c new file mode 100644 index 0000000..77bdb75 --- /dev/null +++ b/src/intel/vulkan/gen7_state.c @@ -0,0 +1,264 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen7_pack.h" +#include "genxml/gen75_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +GENX_FUNC(GEN7, GEN75) void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, + uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN_4, + .SurfaceHorizontalAlignment = HALIGN_4, + .TiledSurface = false, + .RenderCacheReadWriteMode = false, + .SurfaceObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, +# endif + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN7_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampEnable = CLAMP_ENABLE_OGL, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, + pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, + pCreateInfo->anisotropyEnable), + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .BorderColorPointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN_4, + [8] = HALIGN_8, +}; + +static const uint8_t anv_valign[] = { + [2] = VALIGN_2, + [4] = VALIGN_4, +}; + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + uint32_t depth = 1; + if (range->layerCount > 1) { + depth = range->layerCount; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + const struct isl_extent3d image_align_sa = + isl_surf_get_image_alignment_sa(&surface->isl); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, + usage == VK_IMAGE_USAGE_STORAGE_BIT), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], + .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], + + /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if + * Tiled Surface is False." + */ + .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, + .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? + TILEWALK_YMAJOR : TILEWALK_XMAJOR, + + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + + .RenderCacheReadWriteMode = 0, /* TEMPLATE */ + + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->isl.row_pitch - 1, + .MinimumArrayElement = range->baseArrayLayer, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + .SurfaceObjectControlState = GENX(MOCS), + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .MCSEnable = false, +# if (ANV_IS_HASWELL) + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], +# else /* XXX: Seriously? */ + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, +# endif + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c new file mode 100644 index 0000000..b741612 --- /dev/null +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -0,0 +1,914 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +static uint32_t +cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) +{ + static const uint32_t push_constant_opcodes[] = { + [MESA_SHADER_VERTEX] = 21, + [MESA_SHADER_TESS_CTRL] = 25, /* HS */ + [MESA_SHADER_TESS_EVAL] = 26, /* DS */ + [MESA_SHADER_GEOMETRY] = 22, + [MESA_SHADER_FRAGMENT] = 23, + [MESA_SHADER_COMPUTE] = 0, + }; + + VkShaderStageFlags flushed = 0; + + anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { + if (stage == MESA_SHADER_COMPUTE) + continue; + + struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); + + if (state.offset == 0) + continue; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), + ._3DCommandSubOpcode = push_constant_opcodes[stage], + .ConstantBody = { + .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, + .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), + }); + + flushed |= mesa_to_vk_shader_stage(stage); + } + + cmd_buffer->state.push_constants_dirty &= ~flushed; + + return flushed; +} + +#if ANV_GEN == 8 +static void +emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t count, const VkViewport *viewports) +{ + struct anv_state sf_clip_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); + + for (uint32_t i = 0; i < count; i++) { + const VkViewport *vp = &viewports[i]; + + /* The gen7 state struct has just the matrix and guardband fields, the + * gen8 struct adds the min/max viewport fields. */ + struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = 1.0, + .ViewportMatrixElementm30 = vp->x + vp->width / 2, + .ViewportMatrixElementm31 = vp->y + vp->height / 2, + .ViewportMatrixElementm32 = 0.0, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->x, + .XMaxViewPort = vp->x + vp->width - 1, + .YMinViewPort = vp->y, + .YMaxViewPort = vp->y + vp->height - 1, + }; + + struct GENX(CC_VIEWPORT) cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, + &sf_clip_viewport); + GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); + } + + if (!cmd_buffer->device->info.has_llc) { + anv_state_clflush(sf_clip_state); + anv_state_clflush(cc_state); + } + + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), + .CCViewportPointer = cc_state.offset); + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), + .SFClipViewportPointer = sf_clip_state.offset); +} + +void +gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.dynamic.viewport.count > 0) { + emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); + } else { + /* If viewport count is 0, this is taken to mean "use the default" */ + emit_viewport_state(cmd_buffer, 1, + &(VkViewport) { + .x = 0.0f, + .y = 0.0f, + .width = cmd_buffer->state.framebuffer->width, + .height = cmd_buffer->state.framebuffer->height, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }); + } +} +#endif + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +#define GEN8_L3CNTLREG 0x7034 + +static void +config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) +{ + /* References for GL state: + * + * - commits e307cfa..228d5a3 + * - src/mesa/drivers/dri/i965/gen7_l3_state.c + */ + + uint32_t val = enable_slm ? + /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ + 0x60000021 : + /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ + 0x60000060; + bool changed = cmd_buffer->state.current_l3_config != val; + + if (changed) { + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline and + * initiates invalidation of the relevant caches... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true, + .ConstantCacheInvalidationEnable = true, + .InstructionCacheInvalidateEnable = true, + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + /* ...followed by a second stalling flush which guarantees that + * invalidation is complete when the L3 configuration registers are + * modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DCFlushEnable = true, + .PostSyncOperation = NoWrite, + .CommandStreamerStallEnable = true); + + emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); + cmd_buffer->state.current_l3_config = val; + } +} + +static void +__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} +static void +__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t sf_dw[GENX(3DSTATE_SF_length)]; + struct GEN9_3DSTATE_SF sf = { + GEN9_3DSTATE_SF_header, + .LineWidth = cmd_buffer->state.dynamic.line_width, + }; + GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); + /* FIXME: gen9.fs */ + anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, + cmd_buffer->state.pipeline->gen8.sf); +} + +static void +__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->device->info.is_cherryview) + __emit_gen9_sf_state(cmd_buffer); + else + __emit_genx_sf_state(cmd_buffer); +} + +void +genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + config_l3(cmd_buffer, false); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GENX(3DSTATE_VERTEX_BUFFERS)); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GENX(VERTEX_BUFFER_STATE) state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GENX(MOCS), + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + /* We emit the binding tables and sampler tables first, then emit push + * constants and then finally emit binding table and sampler table + * pointers. It has to happen in this order, since emitting the binding + * tables may change the push constants (in case of storage images). After + * emitting push constants, on SKL+ we have to emit the corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. + */ + uint32_t dirty = 0; + if (cmd_buffer->state.descriptors_dirty) + dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.push_constants_dirty) + dirty |= cmd_buffer_flush_push_constants(cmd_buffer); + + if (dirty) + gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) + gen8_cmd_buffer_emit_viewport(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + gen7_cmd_buffer_emit_scissor(cmd_buffer); + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { + __emit_sf_state(cmd_buffer); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ + bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || + cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; + + uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, + .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, + .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp + }; + GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); + anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, + pipeline->gen8.raster); + } + + /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to + * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split + * across different state packets for gen8 and gen9. We handle that by + * using a big old #if switch here. + */ +#if ANV_GEN == 8 + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN8_COLOR_CALC_STATE_length * 4, + 64); + struct GEN8_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + .StencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.front, + .BackFaceStencilReferenceValue = + cmd_buffer->state.dynamic.stencil_reference.back, + }; + GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { + uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = + cmd_buffer->state.dynamic.stencil_write_mask.front != 0, + + .StencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, + .StencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = + cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = + cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, + }; + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, + &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, + pipeline->gen8.wm_depth_stencil); + } +#else + if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { + struct anv_state cc_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + GEN9_COLOR_CALC_STATE_length * 4, + 64); + struct GEN9_COLOR_CALC_STATE cc = { + .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], + .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], + .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], + .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], + }; + GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(cc_state); + + anv_batch_emit(&cmd_buffer->batch, + GEN9_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = cc_state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { + uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; + struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN9_3DSTATE_WM_DEPTH_STENCIL_header, + + .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || + d->stencil_write_mask.back != 0, + + .StencilTestMask = d->stencil_compare_mask.front & 0xff, + .StencilWriteMask = d->stencil_write_mask.front & 0xff, + + .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, + .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, + + .StencilReferenceValue = d->stencil_reference.front, + .BackfaceStencilReferenceValue = d->stencil_reference.back + }; + GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); + + anv_batch_emit_merge(&cmd_buffer->batch, dwords, + pipeline->gen9.wm_depth_stencil); + } +#endif + + if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | + ANV_CMD_DIRTY_INDEX_BUFFER)) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), + .IndexedDrawCutIndexEnable = pipeline->primitive_restart, + .CutIndex = cmd_buffer->state.restart_index, + ); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void genX(CmdBindIndexBuffer)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + static const uint32_t restart_index_for_type[] = { + [VK_INDEX_TYPE_UINT16] = UINT16_MAX, + [VK_INDEX_TYPE_UINT32] = UINT32_MAX, + }; + + cmd_buffer->state.restart_index = restart_index_for_type[indexType]; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); + + cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = anv_cmd_buffer_emit_samplers(cmd_buffer, + MESA_SHADER_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = anv_cmd_buffer_emit_binding_table(cmd_buffer, + MESA_SHADER_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; + unsigned push_constant_data_size = + (prog_data->nr_params + local_id_dwords) * 4; + unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); + unsigned push_constant_regs = reg_aligned_constant_size / 32; + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), + .CURBETotalDataLength = push_state.alloc_size, + .CURBEDataStartAddress = push_state.offset); + } + + assert(prog_data->total_shared <= 64 * 1024); + uint32_t slm_size = 0; + if (prog_data->total_shared > 0) { + /* slm_size is in 4k increments, but must be a power of 2. */ + slm_size = 4 * 1024; + while (slm_size < prog_data->total_shared) + slm_size <<= 1; + slm_size /= 4 * 1024; + } + + struct anv_state state = + anv_state_pool_emit(&device->dynamic_state_pool, + GENX(INTERFACE_DESCRIPTOR_DATA), 64, + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .ConstantIndirectURBEntryReadLength = push_constant_regs, + .ConstantURBEntryReadOffset = 0, + .BarrierEnable = cs_prog_data->uses_barrier, + .SharedLocalMemorySize = slm_size, + .NumberofThreadsinGPGPUThreadGroup = + pipeline->cs_thread_width_max); + + uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +void +genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + config_l3(cmd_buffer, needs_slm); + + if (cmd_buffer->state.current_pipeline != GPGPU) { +#if ANV_GEN < 10 + /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: + * + * Software must clear the COLOR_CALC_STATE Valid field in + * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT + * with Pipeline Select set to GPGPU. + * + * The internal hardware docs recommend the same workaround for Gen9 + * hardware too. + */ + anv_batch_emit(&cmd_buffer->batch, + GENX(3DSTATE_CC_STATE_POINTERS)); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .DepthStallEnable = true, + .Address = { bo, offset }); +} + +static void +emit_query_availability(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { bo, offset }, + .ImmediateData = 1); +} + +void genX(CmdBeginQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + /* Workaround: When meta uses the pipeline with the VS disabled, it seems + * that the pipelining of the depth write breaks. What we see is that + * samples from the render pass clear leaks into the first query + * immediately after the clear. Doing a pipecontrol with a post-sync + * operation and DepthStallEnable seems to work around the issue. + */ + if (cmd_buffer->state.need_query_wa) { + cmd_buffer->state.need_query_wa = false; + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DepthCacheFlushEnable = true, + .DepthStallEnable = true); + } + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void genX(CmdEndQuery)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 8); + + emit_query_availability(&cmd_buffer->batch, &pool->bo, + query * sizeof(struct anv_query_pool_slot) + 16); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +#define TIMESTAMP 0x2358 + +void genX(CmdWriteTimestamp)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + uint32_t offset = query * sizeof(struct anv_query_pool_slot); + + assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); + + switch (pipelineStage) { + case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { &pool->bo, offset }); + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { &pool->bo, offset + 4 }); + break; + + default: + /* Everything else is bottom-of-pipe */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = { &pool->bo, offset }); + break; + } + + emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); +} + +#define alu_opcode(v) __gen_uint((v), 20, 31) +#define alu_operand1(v) __gen_uint((v), 10, 19) +#define alu_operand2(v) __gen_uint((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +static void +store_query_result(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) +{ + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void genX(CmdCopyQueryPoolResults)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + break; + + case VK_QUERY_TYPE_TIMESTAMP: + emit_load_alu_reg_u64(&cmd_buffer->batch, + CS_GPR(2), &pool->bo, slot_offset); + break; + + default: + unreachable("unhandled query type"); + } + + store_query_result(&cmd_buffer->batch, + CS_GPR(2), buffer->bo, dst_offset, flags); + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), + &pool->bo, slot_offset + 16); + if (flags & VK_QUERY_RESULT_64_BIT) + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 8, flags); + else + store_query_result(&cmd_buffer->batch, + CS_GPR(0), buffer->bo, dst_offset + 4, flags); + } + + dst_offset += destStride; + } +} + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_SET); +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteImmediateData, + .Address = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }, + .ImmediateData = VK_EVENT_RESET); +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), + .WaitMode = PollingMode, + .CompareOperation = COMPARE_SAD_EQUAL_SDD, + .SemaphoreDataDword = VK_EVENT_SET, + .SemaphoreAddress = { + &cmd_buffer->device->dynamic_state_block_pool.bo, + event->state.offset + }); + } + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c new file mode 100644 index 0000000..f041156 --- /dev/null +++ b/src/intel/vulkan/gen8_pipeline.c @@ -0,0 +1,573 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_pipeline_util.h" + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), + .PrimitiveTopologyType = pipeline->topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterizationStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t samples = 1; + + if (ms_info) + samples = ms_info->rasterizationSamples; + + struct GENX(3DSTATE_SF) sf = { + GENX(3DSTATE_SF_header), + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); + + struct GENX(3DSTATE_RASTER) raster = { + GENX(3DSTATE_RASTER_header), + + /* For details on 3DSTATE_RASTER multisample state, see the BSpec table + * "Multisample Modes State". + */ + .DXMultisampleRasterizationEnable = samples > 1, + .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, + .ForceMultisampling = false, + + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), +#if ANV_GEN == 8 + .ViewportZClipTestEnable = true, +#else + /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ + .ViewportZFarClipTestEnable = true, + .ViewportZNearClipTestEnable = true, +#endif + }; + + GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + struct anv_device *device = pipeline->device; + + uint32_t num_dwords = GENX(BLEND_STATE_length); + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GENX(BLEND_STATE) blend_state = { + .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, + .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || + a->dstColorBlendFactor != a->dstAlphaBlendFactor || + a->colorBlendOp != a->alphaBlendOp) { + blend_state.IndependentAlphaBlendEnable = true; + } + + blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, + .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], + .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], + .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], + .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], + .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), + .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), + .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), + .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), + }; + + /* Our hardware applies the blend factor prior to the blend function + * regardless of what function is used. Technically, this means the + * hardware can do MORE than GL or Vulkan specify. However, it also + * means that, for MIN and MAX, we have to stomp the blend factor to + * ONE to make it a no-op. + */ + if (a->colorBlendOp == VK_BLEND_OP_MIN || + a->colorBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; + } + if (a->alphaBlendOp == VK_BLEND_OP_MIN || + a->alphaBlendOp == VK_BLEND_OP_MAX) { + blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; + blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; + } + } + + for (uint32_t i = info->attachmentCount; i < 8; i++) { + blend_state.Entry[i].WriteDisableAlpha = true; + blend_state.Entry[i].WriteDisableRed = true; + blend_state.Entry[i].WriteDisableGreen = true; + blend_state.Entry[i].WriteDisableBlue = true; + } + + GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); + if (!device->info.has_llc) + anv_state_clflush(pipeline->blend_state); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + uint32_t *dw = ANV_GEN == 8 ? + pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; + + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->gen8.wm_depth_stencil, 0, + sizeof(pipeline->gen8.wm_depth_stencil)); + memset(pipeline->gen9.wm_depth_stencil, 0, + sizeof(pipeline->gen9.wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ + + struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], + }; + + GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); +} + +static void +emit_ms_state(struct anv_pipeline *pipeline, + const VkPipelineMultisampleStateCreateInfo *info) +{ + uint32_t samples = 1; + uint32_t log2_samples = 0; + + /* From the Vulkan 1.0 spec: + * If pSampleMask is NULL, it is treated as if the mask has all bits + * enabled, i.e. no coverage is removed from fragments. + * + * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. + */ + uint32_t sample_mask = 0xffff; + + if (info) { + samples = info->rasterizationSamples; + log2_samples = __builtin_ffs(samples) - 1; + } + + if (info && info->pSampleMask) + sample_mask &= info->pSampleMask[0]; + + if (info && info->sampleShadingEnable) + anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), + + /* The PRM says that this bit is valid only for DX9: + * + * SW can choose to set this bit only for DX9 API. DX10/OGL API's + * should not have any effect by setting or not setting this bit. + */ + .PixelPositionOffsetEnable = false, + + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), + .SampleMask = sample_mask); +} + +VkResult +genX(graphics_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_pipeline_init(pipeline, device, cache, + pCreateInfo, extra, pAllocator); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterizationState); + emit_rs_state(pipeline, pCreateInfo->pRasterizationState, + pCreateInfo->pMultisampleState, extra); + emit_ms_state(pipeline, pCreateInfo->pMultisampleState); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); + + emit_urb_setup(pipeline); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875, + .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->ps_ksp0 == NO_KERNEL ? + 0 : pipeline->wm_prog_data.barycentric_interp_modes); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_kernel == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_kernel, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = gs_prog_data->vertices_in, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + .DispatchMode = gs_prog_data->base.dispatch_mode, + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + .StaticOutput = gs_prog_data->static_vertex_count >= 0, + .StaticOutputVertexCount = + gs_prog_data->static_vertex_count >= 0 ? + gs_prog_data->static_vertex_count : 0, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : + pipeline->vs_vec4; + + if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .FunctionEnable = false, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. */ + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + else + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), + .KernelStartPointer = vs_start, + .SingleVertexDispatch = false, + .VectorMaskEnable = false, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = false, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], + .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + + const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; + if (pipeline->ps_ksp0 == NO_KERNEL) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = false); + } else { + /* TODO: We should clean this up. Among other things, this is mostly + * shared with other gens. + */ + const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) + fs_input_map = &vue_prog_data->vue_map; + else + fs_input_map = &gs_prog_data->base.vue_map; + + struct GENX(3DSTATE_SBE_SWIZ) swiz = { + GENX(3DSTATE_SBE_SWIZ_header), + }; + + int max_source_attr = 0; + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { + int input_index = wm_prog_data->urb_setup[attr]; + + if (input_index < 0) + continue; + + int source_attr = fs_input_map->varying_to_slot[attr]; + max_source_attr = MAX2(max_source_attr, source_attr); + + if (input_index >= 16) + continue; + + if (source_attr == -1) { + /* This attribute does not exist in the VUE--that means that the + * vertex shader did not write to it. It could be that it's a + * regular varying read by the fragment shader but not written by + * the vertex shader or it's gl_PrimitiveID. In the first case the + * value is undefined, in the second it needs to be + * gl_PrimitiveID. + */ + swiz.Attribute[input_index].ConstantSource = PRIM_ID; + swiz.Attribute[input_index].ComponentOverrideX = true; + swiz.Attribute[input_index].ComponentOverrideY = true; + swiz.Attribute[input_index].ComponentOverrideZ = true; + swiz.Attribute[input_index].ComponentOverrideW = true; + } else { + /* We have to subtract two slots to accout for the URB entry output + * read offset in the VS and GS stages. + */ + swiz.Attribute[input_index].SourceAttribute = source_attr - 2; + } + } + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), + .AttributeSwizzleEnable = true, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .VertexURBEntryReadLength = + DIV_ROUND_UP(max_source_attr + 1, 2), + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + wm_prog_data->num_varying_inputs, + +#if ANV_GEN >= 9 + .Attribute0ActiveComponentFormat = ACF_XYZW, + .Attribute1ActiveComponentFormat = ACF_XYZW, + .Attribute2ActiveComponentFormat = ACF_XYZW, + .Attribute3ActiveComponentFormat = ACF_XYZW, + .Attribute4ActiveComponentFormat = ACF_XYZW, + .Attribute5ActiveComponentFormat = ACF_XYZW, + .Attribute6ActiveComponentFormat = ACF_XYZW, + .Attribute7ActiveComponentFormat = ACF_XYZW, + .Attribute8ActiveComponentFormat = ACF_XYZW, + .Attribute9ActiveComponentFormat = ACF_XYZW, + .Attribute10ActiveComponentFormat = ACF_XYZW, + .Attribute11ActiveComponentFormat = ACF_XYZW, + .Attribute12ActiveComponentFormat = ACF_XYZW, + .Attribute13ActiveComponentFormat = ACF_XYZW, + .Attribute14ActiveComponentFormat = ACF_XYZW, + .Attribute15ActiveComponentFormat = ACF_XYZW, + /* wow, much field, very attribute */ + .Attribute16ActiveComponentFormat = ACF_XYZW, + .Attribute17ActiveComponentFormat = ACF_XYZW, + .Attribute18ActiveComponentFormat = ACF_XYZW, + .Attribute19ActiveComponentFormat = ACF_XYZW, + .Attribute20ActiveComponentFormat = ACF_XYZW, + .Attribute21ActiveComponentFormat = ACF_XYZW, + .Attribute22ActiveComponentFormat = ACF_XYZW, + .Attribute23ActiveComponentFormat = ACF_XYZW, + .Attribute24ActiveComponentFormat = ACF_XYZW, + .Attribute25ActiveComponentFormat = ACF_XYZW, + .Attribute26ActiveComponentFormat = ACF_XYZW, + .Attribute27ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute28ActiveComponentFormat = ACF_XYZW, + .Attribute29ActiveComponentFormat = ACF_XYZW, + .Attribute30ActiveComponentFormat = ACF_XYZW, +#endif + ); + + uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, + GENX(3DSTATE_SBE_SWIZ_length)); + GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), + .KernelStartPointer0 = pipeline->ps_ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), + + .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = pipeline->ps_ksp2); + + bool per_sample_ps = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps, + .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, + .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, +#if ANV_GEN >= 9 + .PixelShaderPullsBary = wm_prog_data->pulls_bary, + .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? + ICMS_INNER_CONSERVATIVE : ICMS_NONE, +#else + .PixelShaderUsesInputCoverageMask = + wm_prog_data->uses_sample_mask, +#endif + ); + } + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/gen8_state.c b/src/intel/vulkan/gen8_state.c new file mode 100644 index 0000000..04cfff5 --- /dev/null +++ b/src/intel/vulkan/gen8_state.c @@ -0,0 +1,493 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" + +#include "genxml/gen8_pack.h" +#include "genxml/gen9_pack.h" + +#include "genX_state_util.h" + +VkResult +genX(init_device_state)(struct anv_device *device) +{ + struct anv_batch batch; + + uint32_t cmds[64]; + batch.start = batch.next = cmds; + batch.end = (void *) cmds + sizeof(cmds); + + anv_batch_emit(&batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + + anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), + .StatisticsEnable = true); + anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); + anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), + .ChromaKeyKillEnable = false); + anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); + + /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and + * VkPhysicalDeviceFeatures::standardSampleLocations. + */ + anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), + ._1xSample0XOffset = 0.5, + ._1xSample0YOffset = 0.5, + ._2xSample0XOffset = 0.25, + ._2xSample0YOffset = 0.25, + ._2xSample1XOffset = 0.75, + ._2xSample1YOffset = 0.75, + ._4xSample0XOffset = 0.375, + ._4xSample0YOffset = 0.125, + ._4xSample1XOffset = 0.875, + ._4xSample1YOffset = 0.375, + ._4xSample2XOffset = 0.125, + ._4xSample2YOffset = 0.625, + ._4xSample3XOffset = 0.625, + ._4xSample3YOffset = 0.875, + ._8xSample0XOffset = 0.5625, + ._8xSample0YOffset = 0.3125, + ._8xSample1XOffset = 0.4375, + ._8xSample1YOffset = 0.6875, + ._8xSample2XOffset = 0.8125, + ._8xSample2YOffset = 0.5625, + ._8xSample3XOffset = 0.3125, + ._8xSample3YOffset = 0.1875, + ._8xSample4XOffset = 0.1875, + ._8xSample4YOffset = 0.8125, + ._8xSample5XOffset = 0.0625, + ._8xSample5YOffset = 0.4375, + ._8xSample6XOffset = 0.6875, + ._8xSample6YOffset = 0.9375, + ._8xSample7XOffset = 0.9375, + ._8xSample7YOffset = 0.0625, +#if ANV_GEN >= 9 + ._16xSample0XOffset = 0.5625, + ._16xSample0YOffset = 0.5625, + ._16xSample1XOffset = 0.4375, + ._16xSample1YOffset = 0.3125, + ._16xSample2XOffset = 0.3125, + ._16xSample2YOffset = 0.6250, + ._16xSample3XOffset = 0.7500, + ._16xSample3YOffset = 0.4375, + ._16xSample4XOffset = 0.1875, + ._16xSample4YOffset = 0.3750, + ._16xSample5XOffset = 0.6250, + ._16xSample5YOffset = 0.8125, + ._16xSample6XOffset = 0.8125, + ._16xSample6YOffset = 0.6875, + ._16xSample7XOffset = 0.6875, + ._16xSample7YOffset = 0.1875, + ._16xSample8XOffset = 0.3750, + ._16xSample8YOffset = 0.8750, + ._16xSample9XOffset = 0.5000, + ._16xSample9YOffset = 0.0625, + ._16xSample10XOffset = 0.2500, + ._16xSample10YOffset = 0.1250, + ._16xSample11XOffset = 0.1250, + ._16xSample11YOffset = 0.7500, + ._16xSample12XOffset = 0.0000, + ._16xSample12YOffset = 0.5000, + ._16xSample13XOffset = 0.9375, + ._16xSample13YOffset = 0.2500, + ._16xSample14XOffset = 0.8750, + ._16xSample14YOffset = 0.9375, + ._16xSample15XOffset = 0.0625, + ._16xSample15YOffset = 0.0000, +#endif + ); + + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); + + assert(batch.next <= batch.end); + + return anv_device_submit_simple_batch(device, &batch); +} + +static const uint32_t +isl_to_gen_multisample_layout[] = { + [ISL_MSAA_LAYOUT_NONE] = MSS, + [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, + [ISL_MSAA_LAYOUT_ARRAY] = MSS, +}; + +void +genX(fill_buffer_surface_state)(void *state, enum isl_format format, + uint32_t offset, uint32_t range, uint32_t stride) +{ + uint32_t num_elements = range / stride; + + struct GENX(RENDER_SURFACE_STATE) surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GENX(MOCS), + .Height = ((num_elements - 1) >> 7) & 0x3fff, + .Width = (num_elements - 1) & 0x7f, + .Depth = ((num_elements - 1) >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); +} + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +/** + * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment + * and SurfaceVerticalAlignment. + */ +static void +get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) +{ + #if ANV_GENx10 >= 90 + if (isl_tiling_is_std_y(surf->tiling) || + surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { + /* The hardware ignores the alignment values. Anyway, the surface's + * true alignment is likely outside the enum range of HALIGN* and + * VALIGN*. + */ + *halign = 0; + *valign = 0; + } else { + /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units + * of surface elements (not pixels nor samples). For compressed formats, + * a "surface element" is defined as a compression block. For example, + * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 + * format (ETC2 has a block height of 4), then the vertical alignment is + * 4 compression blocks or, equivalently, 16 pixels. + */ + struct isl_extent3d image_align_el + = isl_surf_get_image_alignment_el(surf); + + *halign = anv_halign[image_align_el.width]; + *valign = anv_valign[image_align_el.height]; + } + #else + /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in + * units of surface samples. For example, if SurfaceVerticalAlignment + * is VALIGN_4 and the surface is singlesampled, then for any surface + * format (compressed or not) the vertical alignment is + * 4 pixels. + */ + struct isl_extent3d image_align_sa + = isl_surf_get_image_alignment_sa(surf); + + *halign = anv_halign[image_align_sa.width]; + *valign = anv_valign[image_align_sa.height]; + #endif +} + +static uint32_t +get_qpitch(const struct isl_surf *surf) +{ + switch (surf->dim) { + default: + unreachable(!"bad isl_surf_dim"); + case ISL_SURF_DIM_1D: + #if ANV_GENx10 >= 90 + /* QPitch is usually expressed as rows of surface elements (where + * a surface element is an compression block or a single surface + * sample). Skylake 1D is an outlier. + * + * From the Skylake BSpec >> Memory Views >> Common Surface + * Formats >> Surface Layout and Tiling >> 1D Surfaces: + * + * Surface QPitch specifies the distance in pixels between array + * slices. + */ + return isl_surf_get_array_pitch_el(surf); + #else + return isl_surf_get_array_pitch_el_rows(surf); + #endif + case ISL_SURF_DIM_2D: + case ISL_SURF_DIM_3D: + #if ANV_GEN >= 9 + return isl_surf_get_array_pitch_el_rows(surf); + #else + /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch + * + * "This field must be set to an integer multiple of the Surface + * Vertical Alignment. For compressed textures (BC*, FXT1, + * ETC*, and EAC* Surface Formats), this field is in units of + * rows in the uncompressed surface, and must be set to an + * integer multiple of the vertical alignment parameter "j" + * defined in the Common Surface Formats section." + */ + return isl_surf_get_array_pitch_sa_rows(surf); + #endif + } +} + +void +genX(fill_image_surface_state)(struct anv_device *device, void *state_map, + struct anv_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + VkImageUsageFlagBits usage) +{ + assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); + assert(util_is_power_of_two(usage)); + + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); + struct anv_surface *surface = + anv_image_get_surface_for_aspect_mask(image, range->aspectMask); + + static const uint8_t isl_to_gen_tiling[] = { + [ISL_TILING_LINEAR] = LINEAR, + [ISL_TILING_X] = XMAJOR, + [ISL_TILING_Y0] = YMAJOR, + [ISL_TILING_Yf] = YMAJOR, + [ISL_TILING_Ys] = YMAJOR, + [ISL_TILING_W] = WMAJOR, + }; + + uint32_t halign, valign; + get_halign_valign(&surface->isl, &halign, &valign); + + struct GENX(RENDER_SURFACE_STATE) template = { + .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), + .SurfaceVerticalAlignment = valign, + .SurfaceHorizontalAlignment = halign, + .TileMode = isl_to_gen_tiling[surface->isl.tiling], + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .CubeFaceEnablePositiveZ = 1, + .CubeFaceEnableNegativeZ = 1, + .CubeFaceEnablePositiveY = 1, + .CubeFaceEnableNegativeY = 1, + .CubeFaceEnablePositiveX = 1, + .CubeFaceEnableNegativeX = 1, + .MemoryObjectControlState = GENX(MOCS), + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, + .Height = iview->level_0_extent.height - 1, + .Width = iview->level_0_extent.width - 1, + .Depth = 0, /* TEMPLATE */ + .SurfacePitch = surface->isl.row_pitch - 1, + .RenderTargetViewExtent = 0, /* TEMPLATE */ + .MinimumArrayElement = 0, /* TEMPLATE */ + .MultisampledSurfaceStorageFormat = + isl_to_gen_multisample_layout[surface->isl.msaa_layout], + .NumberofMultisamples = ffs(surface->isl.samples) - 1, + .MultisamplePositionPaletteIndex = 0, /* UNUSED */ + .XOffset = 0, + .YOffset = 0, + + .MIPCountLOD = 0, /* TEMPLATE */ + .SurfaceMinLOD = 0, /* TEMPLATE */ + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, iview->offset }, + }; + + switch (template.SurfaceType) { + case SURFTYPE_1D: + case SURFTYPE_2D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced + * by one for each increase from zero of Minimum Array Element. For + * example, if Minimum Array Element is set to 1024 on a 2D surface, + * the range of this field is reduced to [0,1023]. + * + * In other words, 'Depth' is the number of array layers. + */ + template.Depth = range->layerCount - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 1D and 2D Surfaces: + * This field must be set to the same value as the Depth field. + */ + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_CUBE: + template.MinimumArrayElement = range->baseArrayLayer; + /* Same as SURFTYPE_2D, but divided by 6 */ + template.Depth = range->layerCount / 6 - 1; + template.RenderTargetViewExtent = template.Depth; + break; + case SURFTYPE_3D: + template.MinimumArrayElement = range->baseArrayLayer; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: + * + * If the volume texture is MIP-mapped, this field specifies the + * depth of the base MIP level. + */ + template.Depth = image->extent.depth - 1; + + /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: + * + * For Render Target and Typed Dataport 3D Surfaces: This field + * indicates the extent of the accessible 'R' coordinates minus 1 on + * the LOD currently being rendered to. + */ + template.RenderTargetViewExtent = iview->extent.depth - 1; + break; + default: + unreachable(!"bad SurfaceType"); + } + + if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + /* For render target surfaces, the hardware interprets field + * MIPCount/LOD as LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + template.MIPCountLOD = range->baseMipLevel; + template.SurfaceMinLOD = 0; + } else { + /* For non render target surfaces, the hardware interprets field + * MIPCount/LOD as MIPCount. The range of levels accessible by the + * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + template.SurfaceMinLOD = range->baseMipLevel; + template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; + } + + GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); +} + +VkResult genX(CreateSampler)( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t border_color_offset = device->border_colors.offset + + pCreateInfo->borderColor * 64; + + struct GENX(SAMPLER_STATE) sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = CLAMP_MODE_OGL, +#if ANV_GEN == 8 + .BaseMipLevel = 0.0, +#endif + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], + .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), + .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), + .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), + .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = OVERRIDE, + + .IndirectStatePointer = border_color_offset >> 6, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], + }; + + GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c new file mode 100644 index 0000000..5498d1d --- /dev/null +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -0,0 +1,717 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +void +genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + +/* XXX: Do we need this on more than just BDW? */ +#if (ANV_GEN >= 8) + /* Emit a render target cache flush. + * + * This isn't documented anywhere in the PRM. However, it seems to be + * necessary prior to changing the surface state base adress. Without + * this, we get GPU hangs when using multi-level command buffers which + * clear depth, reset state base address, and then go render stuff. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .RenderTargetCacheFlushEnable = true); +#endif + + anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GENX(MOCS), + .GeneralStateBaseAddressModifyEnable = true, + + .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), + .SurfaceStateMemoryObjectControlState = GENX(MOCS), + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GENX(MOCS), + .DynamicStateBaseAddressModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GENX(MOCS), + .IndirectObjectBaseAddressModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GENX(MOCS), + .InstructionBaseAddressModifyEnable = true, + +# if (ANV_GEN >= 8) + /* Broadwell requires that we specify a buffer size for a bunch of + * these fields. However, since we will be growing the BO's live, we + * just set them all to the maximum. + */ + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true, +# endif + ); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), + .TextureCacheInvalidationEnable = true); +} + +void genX(CmdPipelineBarrier)( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + uint32_t b, *dw; + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkAccessFlags src_flags = 0; + VkAccessFlags dst_flags = 0; + + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flags |= pMemoryBarriers[i].srcAccessMask; + dst_flags |= pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flags |= pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + src_flags |= pImageMemoryBarriers[i].srcAccessMask; + dst_flags |= pImageMemoryBarriers[i].dstAccessMask; + } + + /* Mask out the Source access flags we care about */ + const uint32_t src_mask = + VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT; + + src_flags = src_flags & src_mask; + + /* Mask out the destination access flags we care about */ + const uint32_t dst_mask = + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | + VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT; + + dst_flags = dst_flags & dst_mask; + + /* The src flags represent how things were used previously. This is + * what we use for doing flushes. + */ + struct GENX(PIPE_CONTROL) flush_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_cmd.DCFlushEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_cmd.DepthCacheFlushEnable = true; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_cmd.RenderTargetCacheFlushEnable = true; + flush_cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to + * stall and wait for the flushing to finish, so we don't re-dirty the + * caches with in-flight rendering after the second PIPE_CONTROL + * invalidates. + */ + + if (dst_flags) + flush_cmd.CommandStreamerStallEnable = true; + + if (src_flags && dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); + } + + /* The dst flags represent how things will be used in the future. This + * is what we use for doing cache invalidations. + */ + struct GENX(PIPE_CONTROL) invalidate_cmd = { + GENX(PIPE_CONTROL_header), + .PostSyncOperation = NoWrite, + }; + + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + invalidate_cmd.VFCacheInvalidationEnable = true; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + invalidate_cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_ACCESS_SHADER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + case VK_ACCESS_TRANSFER_READ_BIT: + invalidate_cmd.TextureCacheInvalidationEnable = true; + break; + default: + unreachable("should've masked this out by now"); + } + } + + if (dst_flags) { + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); + GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); + } +} + +static void +emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, uint32_t offset) +{ + uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, + GENX(3DSTATE_VERTEX_BUFFERS)); + + GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, + &(struct GENX(VERTEX_BUFFER_STATE)) { + .VertexBufferIndex = 32, /* Reserved for this */ + .AddressModifyEnable = true, + .BufferPitch = 0, +#if (ANV_GEN >= 8) + .MemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .BufferSize = 8 +#else + .VertexBufferMemoryObjectControlState = GENX(MOCS), + .BufferStartingAddress = { bo, offset }, + .EndAddress = { bo, offset + 8 }, +#endif + }); +} + +static void +emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, + uint32_t base_vertex, uint32_t base_instance) +{ + struct anv_state id_state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); + + ((uint32_t *)id_state.map)[0] = base_vertex; + ((uint32_t *)id_state.map)[1] = base_instance; + + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(id_state); + + emit_base_vertex_instance_bo(cmd_buffer, + &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); +} + +void genX(CmdDraw)( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void genX(CmdDrawIndexed)( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +static void +emit_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), + .RegisterOffset = reg, + .DataDWord = imm); +} + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL, + .PrimitiveTopologyType = pipeline->topology); +} + +void genX(CmdDrawIndexedIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || + cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); + + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM, + .PrimitiveTopologyType = pipeline->topology); +} + + +void genX(CmdDispatch)( + VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + if (prog_data->uses_num_work_groups) { + struct anv_state state = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); + uint32_t *sizes = state.map; + sizes[0] = x; + sizes[1] = y; + sizes[2] = z; + if (!cmd_buffer->device->info.has_llc) + anv_state_clflush(state); + cmd_buffer->state.num_workgroups_offset = state.offset; + cmd_buffer->state.num_workgroups_bo = + &cmd_buffer->device->dynamic_state_block_pool.bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void genX(CmdDispatchIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + if (prog_data->uses_num_work_groups) { + cmd_buffer->state.num_workgroups_offset = bo_offset; + cmd_buffer->state.num_workgroups_bo = bo; + } + + genX(cmd_buffer_flush_compute_state)(cmd_buffer); + + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); +} + +void +genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) +{ + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), +#if ANV_GEN >= 9 + .MaskBits = 3, +#endif + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } +} + +static void +cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_image_view *iview = + anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); + const struct anv_image *image = iview ? iview->image : NULL; + const struct anv_format *anv_format = + iview ? anv_format_for_vk_format(iview->vk_format) : NULL; + const bool has_depth = iview && anv_format->has_depth; + const bool has_stencil = iview && anv_format->has_stencil; + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + /* Emit 3DSTATE_DEPTH_BUFFER */ + if (has_depth) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = true, + .StencilWriteEnable = has_stencil, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, + &image->depth_surface.isl), + .SurfacePitch = image->depth_surface.isl.row_pitch - 1, + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->depth_surface.offset, + }, + .Height = fb->height - 1, + .Width = fb->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GENX(MOCS), +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, +#endif + .RenderTargetViewExtent = 1 - 1); + } else { + /* Even when no depth buffer is present, the hardware requires that + * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: + * + * If a null depth buffer is bound, the driver must instead bind depth as: + * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D + * 3DSTATE_DEPTH.Width = 1 + * 3DSTATE_DEPTH.Height = 1 + * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM + * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 + * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 + * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 + * + * The PRM is wrong, though. The width and height must be programmed to + * actual framebuffer's width and height, even when neither depth buffer + * nor stencil buffer is present. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), + .SurfaceType = SURFTYPE_2D, + .SurfaceFormat = D16_UNORM, + .Width = fb->width - 1, + .Height = fb->height - 1, + .StencilWriteEnable = has_stencil); + } + + /* Emit 3DSTATE_STENCIL_BUFFER */ + if (has_stencil) { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), +#if ANV_GEN >= 8 || ANV_IS_HASWELL + .StencilBufferEnable = true, +#endif + .StencilBufferObjectControlState = GENX(MOCS), + + /* Stencil buffers have strange pitch. The PRM says: + * + * The pitch must be set to 2x the value computed based on width, + * as the stencil buffer is stored with two rows interleaved. + */ + .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, + +#if ANV_GEN >= 8 + .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, +#endif + .SurfaceBaseAddress = { + .bo = image->bo, + .offset = image->offset + image->stencil_surface.offset, + }); + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); + } + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); +} + +/** + * @see anv_cmd_buffer_set_subpass() + */ +void +genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void genX(CmdBeginRenderPass)( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); + + genX(flush_pipeline_select_3d)(cmd_buffer); + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdNextSubpass)( + VkCommandBuffer commandBuffer, + VkSubpassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); + genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); + anv_cmd_buffer_clear_subpass(cmd_buffer); +} + +void genX(CmdEndRenderPass)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + anv_cmd_buffer_resolve_subpass(cmd_buffer); +} diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c new file mode 100644 index 0000000..4c2e0bc --- /dev/null +++ b/src/intel/vulkan/genX_pipeline.c @@ -0,0 +1,126 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#if (ANV_GEN == 9) +# include "genxml/gen9_pack.h" +#elif (ANV_GEN == 8) +# include "genxml/gen8_pack.h" +#elif (ANV_IS_HASWELL) +# include "genxml/gen75_pack.h" +#elif (ANV_GEN == 7) +# include "genxml/gen7_pack.h" +#endif + +VkResult +genX(compute_pipeline_create)( + VkDevice _device, + struct anv_pipeline_cache * cache, + const VkComputePipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + pipeline->blend_state.map = NULL; + + result = anv_reloc_list_init(&pipeline->batch_relocs, + pAllocator ? pAllocator : &device->alloc); + if (result != VK_SUCCESS) { + anv_free2(&device->alloc, pAllocator, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_kernel = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); + anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, + pCreateInfo->stage.pName, + pCreateInfo->stage.pSpecializationInfo); + + pipeline->use_repclear = false; + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), + .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), +#if ANV_GEN > 7 + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, +#else + .GPGPUMode = true, +#endif + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, + .ResetGatewayTimer = true, +#if ANV_GEN <= 8 + .BypassGatewayControl = true, +#endif + .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h new file mode 100644 index 0000000..696e2be --- /dev/null +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -0,0 +1,327 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static uint32_t +vertex_element_comp_control(enum isl_format format, unsigned comp) +{ + uint8_t bits; + switch (comp) { + case 0: bits = isl_format_layouts[format].channels.r.bits; break; + case 1: bits = isl_format_layouts[format].channels.g.bits; break; + case 2: bits = isl_format_layouts[format].channels.b.bits; break; + case 3: bits = isl_format_layouts[format].channels.a.bits; break; + default: unreachable("Invalid component"); + } + + if (bits) { + return VFCOMP_STORE_SRC; + } else if (comp < 3) { + return VFCOMP_STORE_0; + } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || + isl_format_layouts[format].channels.r.type == ISL_SINT) { + assert(comp == 3); + return VFCOMP_STORE_1_INT; + } else { + assert(comp == 3); + return VFCOMP_STORE_1_FP; + } +} + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info, + const struct anv_graphics_pipeline_create_info *extra) +{ + uint32_t elements; + if (extra && extra->disable_vs) { + /* If the VS is disabled, just assume the user knows what they're + * doing and apply the layout blindly. This can only come from + * meta, so this *should* be safe. + */ + elements = 0; + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) + elements |= (1 << info->pVertexAttributeDescriptions[i].location); + } else { + /* Pull inputs_read out of the VS prog data */ + uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); + elements = inputs_read >> VERT_ATTRIB_GENERIC0; + } + +#if ANV_GEN >= 8 + /* On BDW+, we only need to allocate space for base ids. Setting up + * the actual vertex and instance id is a separate packet. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#else + /* On Haswell and prior, vertex and instance id are created by using the + * ComponentControl fields, so we need an element for any of them. + */ + const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || + pipeline->vs_prog_data.uses_instanceid || + pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance; +#endif + + uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; + if (elem_count == 0) + return; + + uint32_t *p; + + const uint32_t num_dwords = 1 + elem_count * 2; + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GENX(3DSTATE_VERTEX_ELEMENTS)); + memset(p + 1, 0, (num_dwords - 1) * 4); + + for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + enum isl_format format = anv_get_isl_format(desc->format, + VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_TILING_LINEAR, + NULL); + + assert(desc->binding < 32); + + if ((elements & (1 << desc->location)) == 0) + continue; /* Binding unused */ + + uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offset, + .Component0Control = vertex_element_comp_control(format, 0), + .Component1Control = vertex_element_comp_control(format, 1), + .Component2Control = vertex_element_comp_control(format, 2), + .Component3Control = vertex_element_comp_control(format, 3), + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); + +#if ANV_GEN >= 8 + /* On Broadwell and later, we have a separate VF_INSTANCING packet + * that controls instancing. On Haswell and prior, that's part of + * VERTEX_BUFFER_STATE which we emit later. + */ + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), + .InstancingEnable = pipeline->instancing_enable[desc->binding], + .VertexElementIndex = slot, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); +#endif + } + + const uint32_t id_slot = __builtin_popcount(elements); + if (needs_svgs_elem) { + /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: + * "Within a VERTEX_ELEMENT_STATE structure, if a Component + * Control field is set to something other than VFCOMP_STORE_SRC, + * no higher-numbered Component Control fields may be set to + * VFCOMP_STORE_SRC" + * + * This means, that if we have BaseInstance, we need BaseVertex as + * well. Just do all or nothing. + */ + uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || + pipeline->vs_prog_data.uses_baseinstance) ? + VFCOMP_STORE_SRC : VFCOMP_STORE_0; + + struct GENX(VERTEX_ELEMENT_STATE) element = { + .VertexBufferIndex = 32, /* Reserved for this */ + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32_UINT, + .Component0Control = base_ctrl, + .Component1Control = base_ctrl, +#if ANV_GEN >= 8 + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, +#else + .Component2Control = VFCOMP_STORE_VID, + .Component3Control = VFCOMP_STORE_IID, +#endif + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); + } + +#if ANV_GEN >= 8 + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = id_slot, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = id_slot); +#endif +} + +static inline void +emit_urb_setup(struct anv_pipeline *pipeline) +{ +#if ANV_GEN == 7 + struct anv_device *device = pipeline->device; + + /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: + * + * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall + * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL + * needs to be sent before any combination of VS associated 3DSTATE." + */ + anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, + .DepthStallEnable = true, + .PostSyncOperation = WriteImmediateData, + .Address = { &device->workaround_bo, 0 }); +#endif + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); +} + +static inline uint32_t +scratch_space(const struct brw_stage_prog_data *prog_data) +{ + return ffs(prog_data->total_scratch / 2048); +} + +static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH +}; + +static const uint32_t vk_to_gen_fillmode[] = { + [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, + [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, + [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, +}; + +static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, + [VK_FRONT_FACE_CLOCKWISE] = 0 +}; + +static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, +}; + +static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, +}; + +static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, + [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, + [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, + [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, + [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, + [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, + [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, + [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, +}; diff --git a/src/intel/vulkan/genX_state_util.h b/src/intel/vulkan/genX_state_util.h new file mode 100644 index 0000000..67f798a --- /dev/null +++ b/src/intel/vulkan/genX_state_util.h @@ -0,0 +1,112 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static const uint8_t +anv_surftype(const struct anv_image *image, VkImageViewType view_type, + bool storage) +{ + switch (view_type) { + default: + unreachable("bad VkImageViewType"); + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_1D); + return SURFTYPE_1D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return storage ? SURFTYPE_2D : SURFTYPE_CUBE; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + assert(image->type == VK_IMAGE_TYPE_2D); + return SURFTYPE_2D; + case VK_IMAGE_VIEW_TYPE_3D: + assert(image->type == VK_IMAGE_TYPE_3D); + return SURFTYPE_3D; + } +} + +static enum isl_format +anv_surface_format(const struct anv_device *device, enum isl_format format, + bool storage) +{ + if (storage) { + return isl_lower_storage_image_format(&device->isl_dev, format); + } else { + return format; + } +} + +#if ANV_GEN > 7 || ANV_IS_HASWELL +static const uint32_t vk_to_gen_swizzle[] = { + [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, + [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, + [VK_COMPONENT_SWIZZLE_R] = SCS_RED, + [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, + [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, + [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA +}; +#endif + +static inline uint32_t +vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) +{ + switch (filter) { + default: + assert(!"Invalid filter"); + case VK_FILTER_NEAREST: + return MAPFILTER_NEAREST; + case VK_FILTER_LINEAR: + return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; + } +} + +static inline uint32_t +vk_to_gen_max_anisotropy(float ratio) +{ + return (anv_clamp_f(ratio, 2, 16) - 2) / 2; +} + +static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR +}; + +static const uint32_t vk_to_gen_tex_address[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, +}; + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, +}; diff --git a/src/intel/vulkan/intel_icd.json.in b/src/intel/vulkan/intel_icd.json.in new file mode 100644 index 0000000..d9b363a --- /dev/null +++ b/src/intel/vulkan/intel_icd.json.in @@ -0,0 +1,7 @@ +{ + "file_format_version": "1.0.0", + "ICD": { + "library_path": "@install_libdir@/libvulkan_intel.so", + "abi_versions": "1.0.3" + } +} diff --git a/src/intel/vulkan/tests/.gitignore b/src/intel/vulkan/tests/.gitignore new file mode 100644 index 0000000..5d05405 --- /dev/null +++ b/src/intel/vulkan/tests/.gitignore @@ -0,0 +1,5 @@ +block_pool +block_pool_no_free +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/intel/vulkan/tests/Makefile.am b/src/intel/vulkan/tests/Makefile.am new file mode 100644 index 0000000..883013d --- /dev/null +++ b/src/intel/vulkan/tests/Makefile.am @@ -0,0 +1,46 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/isl/ \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c new file mode 100644 index 0000000..86d1a76 --- /dev/null +++ b/src/intel/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; + uint32_t back_blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + int32_t block, *data; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = anv_block_pool_alloc(job->pool); + data = job->pool->map + block; + *data = block; + assert(block >= 0); + job->blocks[i] = block; + + block = anv_block_pool_alloc_back(job->pool); + data = job->pool->map + block; + *data = block; + assert(block < 0); + job->back_blocks[i] = -block; + } + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { + block = job->blocks[i]; + data = job->pool->map + block; + assert(*data == block); + + block = -job->back_blocks[i]; + data = job->pool->map + block; + assert(*data == block); + } + + return NULL; +} + +static void validate_monotonic(uint32_t **blocks) +{ + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < blocks[i][next[i]]) { + thread_max = blocks[i][next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); + + highest = blocks[max_thread_idx][next[max_thread_idx]]; + next[max_thread_idx]++; + } +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* Validate that the block allocations were monotonic */ + uint32_t *block_ptrs[NUM_THREADS]; + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].blocks; + validate_monotonic(block_ptrs); + + /* Validate that the back block allocations were monotonic */ + for (unsigned i = 0; i < NUM_THREADS; i++) + block_ptrs[i] = jobs[i].back_blocks; + validate_monotonic(block_ptrs); + + anv_block_pool_finish(&pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c new file mode 100644 index 0000000..878ec19 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } + + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 0000000..2f4eb47 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,66 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c new file mode 100644 index 0000000..4b248c2 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,117 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + pthread_mutex_init(&device.mutex, NULL); + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + pthread_mutex_destroy(&device.mutex); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/intel/vulkan/tests/state_pool_test_helper.h b/src/intel/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 0000000..0e56431 --- /dev/null +++ b/src/intel/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore deleted file mode 100644 index 40afc2e..0000000 --- a/src/vulkan/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -# Generated source files -/*_spirv_autogen.h -/anv_entrypoints.c -/anv_entrypoints.h -/wayland-drm-protocol.c -/wayland-drm-client-protocol.h -/dev_icd.json -/intel_icd.json -/gen*_pack.h \ No newline at end of file diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am deleted file mode 100644 index 0605dc4..0000000 --- a/src/vulkan/Makefile.am +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright © 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -SUBDIRS = . tests - -vulkan_includedir = $(includedir)/vulkan - -vulkan_include_HEADERS = \ - $(top_srcdir)/include/vulkan/vk_platform.h \ - $(top_srcdir)/include/vulkan/vulkan.h \ - $(top_srcdir)/include/vulkan/vulkan_intel.h - -# Used when generating entrypoints to filter out unwanted extensions -VULKAN_ENTRYPOINT_CPPFLAGS = \ - -I$(top_srcdir)/include/vulkan \ - -DVK_USE_PLATFORM_XCB_KHR \ - -DVK_USE_PLATFORM_WAYLAND_KHR - -lib_LTLIBRARIES = libvulkan_intel.la - -check_LTLIBRARIES = libvulkan-test.la - -PER_GEN_LIBS = \ - libanv-gen7.la \ - libanv-gen75.la \ - libanv-gen8.la \ - libanv-gen9.la - -noinst_LTLIBRARIES = $(PER_GEN_LIBS) - -# The gallium includes are for the util/u_math.h include from main/macros.h - -AM_CPPFLAGS = \ - $(INTEL_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/compiler \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/intel/ \ - -I$(top_builddir)/src \ - -I$(top_builddir)/src/compiler \ - -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/vulkan - -libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init - -VULKAN_SOURCES = \ - anv_allocator.c \ - anv_cmd_buffer.c \ - anv_batch_chain.c \ - anv_descriptor_set.c \ - anv_device.c \ - anv_dump.c \ - anv_entrypoints.c \ - anv_entrypoints.h \ - anv_formats.c \ - anv_image.c \ - anv_intel.c \ - anv_meta.c \ - anv_meta_blit.c \ - anv_meta_clear.c \ - anv_meta_resolve.c \ - anv_nir_apply_dynamic_offsets.c \ - anv_nir_apply_pipeline_layout.c \ - anv_nir_lower_push_constants.c \ - anv_pass.c \ - anv_pipeline.c \ - anv_pipeline_cache.c \ - anv_private.h \ - anv_query.c \ - anv_util.c \ - anv_wsi.c \ - anv_wsi_x11.c - -BUILT_SOURCES = \ - anv_entrypoints.h \ - anv_entrypoints.c - -libanv_gen7_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen7_cmd_buffer.c \ - gen7_pipeline.c \ - gen7_state.c -libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=70 - -libanv_gen75_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen7_cmd_buffer.c \ - gen7_pipeline.c \ - gen7_state.c -libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=75 - -libanv_gen8_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen8_cmd_buffer.c \ - gen8_pipeline.c \ - gen8_state.c -libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=80 - -libanv_gen9_la_SOURCES = \ - genX_cmd_buffer.c \ - genX_pipeline.c \ - gen8_cmd_buffer.c \ - gen8_pipeline.c \ - gen8_state.c -libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DANV_GENx10=90 - -if HAVE_EGL_PLATFORM_WAYLAND -BUILT_SOURCES += \ - wayland-drm-protocol.c \ - wayland-drm-client-protocol.h - -%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml - $(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@ - -%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml - $(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@ - -AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm -VULKAN_SOURCES += \ - wayland-drm-protocol.c \ - anv_wsi_wayland.c -libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM -endif - -libvulkan_intel_la_SOURCES = \ - $(VULKAN_SOURCES) \ - anv_gem.c - -anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@ - -anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) - $(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@ - -CLEANFILES = $(BUILT_SOURCES) - -libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) -lxcb -lxcb-dri3 \ - $(top_builddir)/src/intel/isl/libisl.la \ - $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \ - ../mesa/libmesa.la \ - ../mesa/drivers/dri/common/libdri_test_stubs.la \ - -lpthread -ldl -lstdc++ \ - $(PER_GEN_LIBS) - -libvulkan_intel_la_LDFLAGS = \ - -module -avoid-version -shared -shrext .so - - -# Generate icd files. It would be nice to just be able to add these to -# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64', -# which we can't put in the icd file. When running sed from the Makefile we -# can use ${libdir}, which expands completely and we avoid putting Makefile -# variables in the icd file. - -icdconfdir=$(sysconfdir)/vulkan/icd.d -icdconf_DATA = intel_icd.json -noinst_DATA = dev_icd.json - -%.json : %.json.in - $(AM_V_GEN) $(SED) \ - -e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \ - -e "s#@install_libdir@#${libdir}#" < $< > $@ - - -# Libvulkan with dummy gem. Used for unit tests. - -libvulkan_test_la_SOURCES = \ - $(VULKAN_SOURCES) \ - anv_gem_stubs.c - -libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) -libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) - -include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c deleted file mode 100644 index a7ae975..0000000 --- a/src/vulkan/anv_allocator.c +++ /dev/null @@ -1,862 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#ifdef HAVE_VALGRIND -#define VG_NOACCESS_READ(__ptr) ({ \ - VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ - __typeof(*(__ptr)) __val = *(__ptr); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ - __val; \ -}) -#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ - VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ - *(__ptr) = (__val); \ - VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ -}) -#else -#define VG_NOACCESS_READ(__ptr) (*(__ptr)) -#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) -#endif - -/* Design goals: - * - * - Lock free (except when resizing underlying bos) - * - * - Constant time allocation with typically only one atomic - * - * - Multiple allocation sizes without fragmentation - * - * - Can grow while keeping addresses and offset of contents stable - * - * - All allocations within one bo so we can point one of the - * STATE_BASE_ADDRESS pointers at it. - * - * The overall design is a two-level allocator: top level is a fixed size, big - * block (8k) allocator, which operates out of a bo. Allocation is done by - * either pulling a block from the free list or growing the used range of the - * bo. Growing the range may run out of space in the bo which we then need to - * grow. Growing the bo is tricky in a multi-threaded, lockless environment: - * we need to keep all pointers and contents in the old map valid. GEM bos in - * general can't grow, but we use a trick: we create a memfd and use ftruncate - * to grow it as necessary. We mmap the new size and then create a gem bo for - * it using the new gem userptr ioctl. Without heavy-handed locking around - * our allocation fast-path, there isn't really a way to munmap the old mmap, - * so we just keep it around until garbage collection time. While the block - * allocator is lockless for normal operations, we block other threads trying - * to allocate while we're growing the map. It sholdn't happen often, and - * growing is fast anyway. - * - * At the next level we can use various sub-allocators. The state pool is a - * pool of smaller, fixed size objects, which operates much like the block - * pool. It uses a free list for freeing objects, but when it runs out of - * space it just allocates a new block from the block pool. This allocator is - * intended for longer lived state objects such as SURFACE_STATE and most - * other persistent state objects in the API. We may need to track more info - * with these object and a pointer back to the CPU object (eg VkImage). In - * those cases we just allocate a slightly bigger object and put the extra - * state after the GPU state object. - * - * The state stream allocator works similar to how the i965 DRI driver streams - * all its state. Even with Vulkan, we need to emit transient state (whether - * surface state base or dynamic state base), and for that we can just get a - * block and fill it up. These cases are local to a command buffer and the - * sub-allocator need not be thread safe. The streaming allocator gets a new - * block when it runs out of space and chains them together so they can be - * easily freed. - */ - -/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. - * We use it to indicate the free list is empty. */ -#define EMPTY 1 - -struct anv_mmap_cleanup { - void *map; - size_t size; - uint32_t gem_handle; -}; - -#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) - -static inline long -sys_futex(void *addr1, int op, int val1, - struct timespec *timeout, void *addr2, int val3) -{ - return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); -} - -static inline int -futex_wake(uint32_t *addr, int count) -{ - return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); -} - -static inline int -futex_wait(uint32_t *addr, int32_t value) -{ - return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); -} - -static inline int -memfd_create(const char *name, unsigned int flags) -{ - return syscall(SYS_memfd_create, name, flags); -} - -static inline uint32_t -ilog2_round_up(uint32_t value) -{ - assert(value != 0); - return 32 - __builtin_clz(value - 1); -} - -static inline uint32_t -round_to_power_of_two(uint32_t value) -{ - return 1 << ilog2_round_up(value); -} - -static bool -anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset) -{ - union anv_free_list current, new, old; - - current.u64 = list->u64; - while (current.offset != EMPTY) { - /* We have to add a memory barrier here so that the list head (and - * offset) gets read before we read the map pointer. This way we - * know that the map pointer is valid for the given offset at the - * point where we read it. - */ - __sync_synchronize(); - - int32_t *next_ptr = *map + current.offset; - new.offset = VG_NOACCESS_READ(next_ptr); - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - if (old.u64 == current.u64) { - *offset = current.offset; - return true; - } - current = old; - } - - return false; -} - -static void -anv_free_list_push(union anv_free_list *list, void *map, int32_t offset) -{ - union anv_free_list current, old, new; - int32_t *next_ptr = map + offset; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, current.offset); - new.offset = offset; - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - } while (old.u64 != current.u64); -} - -/* All pointers in the ptr_free_list are assumed to be page-aligned. This - * means that the bottom 12 bits should all be zero. - */ -#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) -#define PFL_PACK(ptr, count) ({ \ - assert(((uintptr_t)(ptr) & 0xfff) == 0); \ - (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ -}) - -static bool -anv_ptr_free_list_pop(void **list, void **elem) -{ - void *current = *list; - while (PFL_PTR(current) != NULL) { - void **next_ptr = PFL_PTR(current); - void *new_ptr = VG_NOACCESS_READ(next_ptr); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(new_ptr, new_count); - void *old = __sync_val_compare_and_swap(list, current, new); - if (old == current) { - *elem = PFL_PTR(current); - return true; - } - current = old; - } - - return false; -} - -static void -anv_ptr_free_list_push(void **list, void *elem) -{ - void *old, *current; - void **next_ptr = elem; - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(elem, new_count); - old = __sync_val_compare_and_swap(list, current, new); - } while (old != current); -} - -static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); - -void -anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size) -{ - assert(util_is_power_of_two(block_size)); - - pool->device = device; - pool->bo.gem_handle = 0; - pool->bo.offset = 0; - pool->bo.size = 0; - pool->block_size = block_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - pool->back_free_list = ANV_FREE_LIST_EMPTY; - - pool->fd = memfd_create("block pool", MFD_CLOEXEC); - if (pool->fd == -1) - return; - - /* Just make it 2GB up-front. The Linux kernel won't actually back it - * with pages until we either map and fault on one of them or we use - * userptr and send a chunk of it off to the GPU. - */ - if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) - return; - - anv_vector_init(&pool->mmap_cleanups, - round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); - - pool->state.next = 0; - pool->state.end = 0; - pool->back_state.next = 0; - pool->back_state.end = 0; - - /* Immediately grow the pool so we'll have a backing bo. */ - pool->state.end = anv_block_pool_grow(pool, &pool->state); -} - -void -anv_block_pool_finish(struct anv_block_pool *pool) -{ - struct anv_mmap_cleanup *cleanup; - - anv_vector_foreach(cleanup, &pool->mmap_cleanups) { - if (cleanup->map) - munmap(cleanup->map, cleanup->size); - if (cleanup->gem_handle) - anv_gem_close(pool->device, cleanup->gem_handle); - } - - anv_vector_finish(&pool->mmap_cleanups); - - close(pool->fd); -} - -#define PAGE_SIZE 4096 - -/** Grows and re-centers the block pool. - * - * We grow the block pool in one or both directions in such a way that the - * following conditions are met: - * - * 1) The size of the entire pool is always a power of two. - * - * 2) The pool only grows on both ends. Neither end can get - * shortened. - * - * 3) At the end of the allocation, we have about twice as much space - * allocated for each end as we have used. This way the pool doesn't - * grow too far in one direction or the other. - * - * 4) If the _alloc_back() has never been called, then the back portion of - * the pool retains a size of zero. (This makes it easier for users of - * the block pool that only want a one-sided pool.) - * - * 5) We have enough space allocated for at least one more block in - * whichever side `state` points to. - * - * 6) The center of the pool is always aligned to both the block_size of - * the pool and a 4K CPU page. - */ -static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) -{ - size_t size; - void *map; - uint32_t gem_handle; - struct anv_mmap_cleanup *cleanup; - - pthread_mutex_lock(&pool->device->mutex); - - assert(state == &pool->state || state == &pool->back_state); - - /* Gather a little usage information on the pool. Since we may have - * threadsd waiting in queue to get some storage while we resize, it's - * actually possible that total_used will be larger than old_size. In - * particular, block_pool_alloc() increments state->next prior to - * calling block_pool_grow, so this ensures that we get enough space for - * which ever side tries to grow the pool. - * - * We align to a page size because it makes it easier to do our - * calculations later in such a way that we state page-aigned. - */ - uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); - uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); - uint32_t total_used = front_used + back_used; - - assert(state == &pool->state || back_used > 0); - - size_t old_size = pool->bo.size; - - if (old_size != 0 && - back_used * 2 <= pool->center_bo_offset && - front_used * 2 <= (old_size - pool->center_bo_offset)) { - /* If we're in this case then this isn't the firsta allocation and we - * already have enough space on both sides to hold double what we - * have allocated. There's nothing for us to do. - */ - goto done; - } - - if (old_size == 0) { - /* This is the first allocation */ - size = MAX2(32 * pool->block_size, PAGE_SIZE); - } else { - size = old_size * 2; - } - - /* We can't have a block pool bigger than 1GB because we use signed - * 32-bit offsets in the free list and we don't want overflow. We - * should never need a block pool bigger than 1GB anyway. - */ - assert(size <= (1u << 31)); - - /* We compute a new center_bo_offset such that, when we double the size - * of the pool, we maintain the ratio of how much is used by each side. - * This way things should remain more-or-less balanced. - */ - uint32_t center_bo_offset; - if (back_used == 0) { - /* If we're in this case then we have never called alloc_back(). In - * this case, we want keep the offset at 0 to make things as simple - * as possible for users that don't care about back allocations. - */ - center_bo_offset = 0; - } else { - /* Try to "center" the allocation based on how much is currently in - * use on each side of the center line. - */ - center_bo_offset = ((uint64_t)size * back_used) / total_used; - - /* Align down to a multiple of both the block size and page size */ - uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE); - assert(util_is_power_of_two(granularity)); - center_bo_offset &= ~(granularity - 1); - - assert(center_bo_offset >= back_used); - - /* Make sure we don't shrink the back end of the pool */ - if (center_bo_offset < pool->back_state.end) - center_bo_offset = pool->back_state.end; - - /* Make sure that we don't shrink the front end of the pool */ - if (size - center_bo_offset < pool->state.end) - center_bo_offset = size - pool->state.end; - } - - assert(center_bo_offset % pool->block_size == 0); - assert(center_bo_offset % PAGE_SIZE == 0); - - /* Assert that we only ever grow the pool */ - assert(center_bo_offset >= pool->back_state.end); - assert(size - center_bo_offset >= pool->state.end); - - cleanup = anv_vector_add(&pool->mmap_cleanups); - if (!cleanup) - goto fail; - *cleanup = ANV_MMAP_CLEANUP_INIT; - - /* Just leak the old map until we destroy the pool. We can't munmap it - * without races or imposing locking on the block allocate fast path. On - * the whole the leaked maps adds up to less than the size of the - * current map. MAP_POPULATE seems like the right thing to do, but we - * should try to get some numbers. - */ - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, - BLOCK_POOL_MEMFD_CENTER - center_bo_offset); - cleanup->map = map; - cleanup->size = size; - - if (map == MAP_FAILED) - goto fail; - - gem_handle = anv_gem_userptr(pool->device, map, size); - if (gem_handle == 0) - goto fail; - cleanup->gem_handle = gem_handle; - -#if 0 - /* Regular objects are created I915_CACHING_CACHED on LLC platforms and - * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are - * always created as I915_CACHING_CACHED, which on non-LLC means - * snooped. That can be useful but comes with a bit of overheard. Since - * we're eplicitly clflushing and don't want the overhead we need to turn - * it off. */ - if (!pool->device->info.has_llc) { - anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); - anv_gem_set_domain(pool->device, gem_handle, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - } -#endif - - /* Now that we successfull allocated everything, we can write the new - * values back into pool. */ - pool->map = map + center_bo_offset; - pool->center_bo_offset = center_bo_offset; - pool->bo.gem_handle = gem_handle; - pool->bo.size = size; - pool->bo.map = map; - pool->bo.index = 0; - -done: - pthread_mutex_unlock(&pool->device->mutex); - - /* Return the appropreate new size. This function never actually - * updates state->next. Instead, we let the caller do that because it - * needs to do so in order to maintain its concurrency model. - */ - if (state == &pool->state) { - return pool->bo.size - pool->center_bo_offset; - } else { - assert(pool->center_bo_offset > 0); - return pool->center_bo_offset; - } - -fail: - pthread_mutex_unlock(&pool->device->mutex); - - return 0; -} - -static uint32_t -anv_block_pool_alloc_new(struct anv_block_pool *pool, - struct anv_block_state *pool_state) -{ - struct anv_block_state state, old, new; - - while (1) { - state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size); - if (state.next < state.end) { - assert(pool->map); - return state.next; - } else if (state.next == state.end) { - /* We allocated the first block outside the pool, we have to grow it. - * pool_state->next acts a mutex: threads who try to allocate now will - * get block indexes above the current limit and hit futex_wait - * below. */ - new.next = state.next + pool->block_size; - new.end = anv_block_pool_grow(pool, pool_state); - assert(new.end >= new.next && new.end % pool->block_size == 0); - old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); - if (old.next != state.next) - futex_wake(&pool_state->end, INT_MAX); - return state.next; - } else { - futex_wait(&pool_state->end, state.end); - continue; - } - } -} - -int32_t -anv_block_pool_alloc(struct anv_block_pool *pool) -{ - int32_t offset; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { - assert(offset >= 0); - assert(pool->map); - return offset; - } - - return anv_block_pool_alloc_new(pool, &pool->state); -} - -/* Allocates a block out of the back of the block pool. - * - * This will allocated a block earlier than the "start" of the block pool. - * The offsets returned from this function will be negative but will still - * be correct relative to the block pool's map pointer. - * - * If you ever use anv_block_pool_alloc_back, then you will have to do - * gymnastics with the block pool's BO when doing relocations. - */ -int32_t -anv_block_pool_alloc_back(struct anv_block_pool *pool) -{ - int32_t offset; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) { - assert(offset < 0); - assert(pool->map); - return offset; - } - - offset = anv_block_pool_alloc_new(pool, &pool->back_state); - - /* The offset we get out of anv_block_pool_alloc_new() is actually the - * number of bytes downwards from the middle to the end of the block. - * We need to turn it into a (negative) offset from the middle to the - * start of the block. - */ - assert(offset >= 0); - return -(offset + pool->block_size); -} - -void -anv_block_pool_free(struct anv_block_pool *pool, int32_t offset) -{ - if (offset < 0) { - anv_free_list_push(&pool->back_free_list, pool->map, offset); - } else { - anv_free_list_push(&pool->free_list, pool->map, offset); - } -} - -static void -anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, - size_t state_size) -{ - /* At least a cache line and must divide the block size. */ - assert(state_size >= 64 && util_is_power_of_two(state_size)); - - pool->state_size = state_size; - pool->free_list = ANV_FREE_LIST_EMPTY; - pool->block.next = 0; - pool->block.end = 0; -} - -static uint32_t -anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool) -{ - int32_t offset; - struct anv_block_state block, old, new; - - /* Try free list first. */ - if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) { - assert(offset >= 0); - return offset; - } - - /* If free list was empty (or somebody raced us and took the items) we - * allocate a new item from the end of the block */ - restart: - block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); - - if (block.next < block.end) { - return block.next; - } else if (block.next == block.end) { - offset = anv_block_pool_alloc(block_pool); - new.next = offset + pool->state_size; - new.end = offset + block_pool->block_size; - old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); - if (old.next != block.next) - futex_wake(&pool->block.end, INT_MAX); - return offset; - } else { - futex_wait(&pool->block.end, block.end); - goto restart; - } -} - -static void -anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, - struct anv_block_pool *block_pool, - uint32_t offset) -{ - anv_free_list_push(&pool->free_list, block_pool->map, offset); -} - -void -anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool) -{ - pool->block_pool = block_pool; - for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { - size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); - anv_fixed_size_state_pool_init(&pool->buckets[i], size); - } - VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); -} - -void -anv_state_pool_finish(struct anv_state_pool *pool) -{ - VG(VALGRIND_DESTROY_MEMPOOL(pool)); -} - -struct anv_state -anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) -{ - unsigned size_log2 = ilog2_round_up(size < align ? align : size); - assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) - size_log2 = ANV_MIN_STATE_SIZE_LOG2; - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - struct anv_state state; - state.alloc_size = 1 << size_log2; - state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], - pool->block_pool); - state.map = pool->block_pool->map + state.offset; - VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); - return state; -} - -void -anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) -{ - assert(util_is_power_of_two(state.alloc_size)); - unsigned size_log2 = ilog2_round_up(state.alloc_size); - assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && - size_log2 <= ANV_MAX_STATE_SIZE_LOG2); - unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; - - VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); - anv_fixed_size_state_pool_free(&pool->buckets[bucket], - pool->block_pool, state.offset); -} - -#define NULL_BLOCK 1 -struct anv_state_stream_block { - /* The next block */ - struct anv_state_stream_block *next; - - /* The offset into the block pool at which this block starts */ - uint32_t offset; - -#ifdef HAVE_VALGRIND - /* A pointer to the first user-allocated thing in this block. This is - * what valgrind sees as the start of the block. - */ - void *_vg_ptr; -#endif -}; - -/* The state stream allocator is a one-shot, single threaded allocator for - * variable sized blocks. We use it for allocating dynamic state. - */ -void -anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool) -{ - stream->block_pool = block_pool; - stream->block = NULL; - - /* Ensure that next + whatever > end. This way the first call to - * state_stream_alloc fetches a new block. - */ - stream->next = 1; - stream->end = 0; - - VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); -} - -void -anv_state_stream_finish(struct anv_state_stream *stream) -{ - VG(const uint32_t block_size = stream->block_pool->block_size); - - struct anv_state_stream_block *next = stream->block; - while (next != NULL) { - VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next))); - struct anv_state_stream_block sb = VG_NOACCESS_READ(next); - VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr)); - VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size)); - anv_block_pool_free(stream->block_pool, sb.offset); - next = sb.next; - } - - VG(VALGRIND_DESTROY_MEMPOOL(stream)); -} - -struct anv_state -anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment) -{ - struct anv_state_stream_block *sb = stream->block; - - struct anv_state state; - - state.offset = align_u32(stream->next, alignment); - if (state.offset + size > stream->end) { - uint32_t block = anv_block_pool_alloc(stream->block_pool); - sb = stream->block_pool->map + block; - - VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb))); - sb->next = stream->block; - sb->offset = block; - VG(sb->_vg_ptr = NULL); - VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size)); - - stream->block = sb; - stream->start = block; - stream->next = block + sizeof(*sb); - stream->end = block + stream->block_pool->block_size; - - state.offset = align_u32(stream->next, alignment); - assert(state.offset + size <= stream->end); - } - - assert(state.offset > stream->start); - state.map = (void *)sb + (state.offset - stream->start); - state.alloc_size = size; - -#ifdef HAVE_VALGRIND - void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); - if (vg_ptr == NULL) { - vg_ptr = state.map; - VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); - VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); - } else { - void *state_end = state.map + state.alloc_size; - /* This only updates the mempool. The newly allocated chunk is still - * marked as NOACCESS. */ - VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr); - /* Mark the newly allocated chunk as undefined */ - VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size); - } -#endif - - stream->next = state.offset + size; - - return state; -} - -struct bo_pool_bo_link { - struct bo_pool_bo_link *next; - struct anv_bo bo; -}; - -void -anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t bo_size) -{ - pool->device = device; - pool->bo_size = bo_size; - pool->free_list = NULL; - - VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); -} - -void -anv_bo_pool_finish(struct anv_bo_pool *pool) -{ - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - anv_gem_munmap(link_copy.bo.map, pool->bo_size); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; - } - - VG(VALGRIND_DESTROY_MEMPOOL(pool)); -} - -VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) -{ - VkResult result; - - void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { - struct bo_pool_bo_link *next_free = next_free_void; - *bo = VG_NOACCESS_READ(&next_free->bo); - assert(bo->map == next_free); - assert(bo->size == pool->bo_size); - - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); - - return VK_SUCCESS; - } - - struct anv_bo new_bo; - - result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); - if (result != VK_SUCCESS) - return result; - - assert(new_bo.size == pool->bo_size); - - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size, 0); - if (new_bo.map == NULL) { - anv_gem_close(pool->device, new_bo.gem_handle); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); - } - - *bo = new_bo; - - VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); - - return VK_SUCCESS; -} - -void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) -{ - struct bo_pool_bo_link *link = bo->map; - link->bo = *bo; - - VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); - anv_ptr_free_list_push(&pool->free_list, link); -} diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c deleted file mode 100644 index d24dd06..0000000 --- a/src/vulkan/anv_batch_chain.c +++ /dev/null @@ -1,1077 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen8_pack.h" - -/** \file anv_batch_chain.c - * - * This file contains functions related to anv_cmd_buffer as a data - * structure. This involves everything required to create and destroy - * the actual batch buffers as well as link them together and handle - * relocations and surface state. It specifically does *not* contain any - * handling of actual vkCmd calls beyond vkCmdExecuteCommands. - */ - -/*-----------------------------------------------------------------------* - * Functions related to anv_reloc_list - *-----------------------------------------------------------------------*/ - -static VkResult -anv_reloc_list_init_clone(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - const struct anv_reloc_list *other_list) -{ - if (other_list) { - list->num_relocs = other_list->num_relocs; - list->array_length = other_list->array_length; - } else { - list->num_relocs = 0; - list->array_length = 256; - } - - list->relocs = - anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (list->relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - list->reloc_bos = - anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (list->reloc_bos == NULL) { - anv_free(alloc, list->relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (other_list) { - memcpy(list->relocs, other_list->relocs, - list->array_length * sizeof(*list->relocs)); - memcpy(list->reloc_bos, other_list->reloc_bos, - list->array_length * sizeof(*list->reloc_bos)); - } - - return VK_SUCCESS; -} - -VkResult -anv_reloc_list_init(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc) -{ - return anv_reloc_list_init_clone(list, alloc, NULL); -} - -void -anv_reloc_list_finish(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc) -{ - anv_free(alloc, list->relocs); - anv_free(alloc, list->reloc_bos); -} - -static VkResult -anv_reloc_list_grow(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - size_t num_additional_relocs) -{ - if (list->num_relocs + num_additional_relocs <= list->array_length) - return VK_SUCCESS; - - size_t new_length = list->array_length * 2; - while (new_length < list->num_relocs + num_additional_relocs) - new_length *= 2; - - struct drm_i915_gem_relocation_entry *new_relocs = - anv_alloc(alloc, new_length * sizeof(*list->relocs), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_relocs == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_reloc_bos = - anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_relocs == NULL) { - anv_free(alloc, new_relocs); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); - memcpy(new_reloc_bos, list->reloc_bos, - list->num_relocs * sizeof(*list->reloc_bos)); - - anv_free(alloc, list->relocs); - anv_free(alloc, list->reloc_bos); - - list->array_length = new_length; - list->relocs = new_relocs; - list->reloc_bos = new_reloc_bos; - - return VK_SUCCESS; -} - -uint64_t -anv_reloc_list_add(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - uint32_t offset, struct anv_bo *target_bo, uint32_t delta) -{ - struct drm_i915_gem_relocation_entry *entry; - int index; - - const uint32_t domain = - target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0; - - anv_reloc_list_grow(list, alloc, 1); - /* TODO: Handle failure */ - - /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ - index = list->num_relocs++; - list->reloc_bos[index] = target_bo; - entry = &list->relocs[index]; - entry->target_handle = target_bo->gem_handle; - entry->delta = delta; - entry->offset = offset; - entry->presumed_offset = target_bo->offset; - entry->read_domains = domain; - entry->write_domain = domain; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); - - return target_bo->offset + delta; -} - -static void -anv_reloc_list_append(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - struct anv_reloc_list *other, uint32_t offset) -{ - anv_reloc_list_grow(list, alloc, other->num_relocs); - /* TODO: Handle failure */ - - memcpy(&list->relocs[list->num_relocs], &other->relocs[0], - other->num_relocs * sizeof(other->relocs[0])); - memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], - other->num_relocs * sizeof(other->reloc_bos[0])); - - for (uint32_t i = 0; i < other->num_relocs; i++) - list->relocs[i + list->num_relocs].offset += offset; - - list->num_relocs += other->num_relocs; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch - *-----------------------------------------------------------------------*/ - -void * -anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) -{ - if (batch->next + num_dwords * 4 > batch->end) - batch->extend_cb(batch, batch->user_data); - - void *p = batch->next; - - batch->next += num_dwords * 4; - assert(batch->next <= batch->end); - - return p; -} - -uint64_t -anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t delta) -{ - return anv_reloc_list_add(batch->relocs, batch->alloc, - location - batch->start, bo, delta); -} - -void -anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) -{ - uint32_t size, offset; - - size = other->next - other->start; - assert(size % 4 == 0); - - if (batch->next + size > batch->end) - batch->extend_cb(batch, batch->user_data); - - assert(batch->next + size <= batch->end); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); - memcpy(batch->next, other->start, size); - - offset = batch->next - batch->start; - anv_reloc_list_append(batch->relocs, batch->alloc, - other->relocs, offset); - - batch->next += size; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static VkResult -anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo **bbo_out) -{ - VkResult result; - - struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; - - result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); - if (result != VK_SUCCESS) - goto fail_bo_alloc; - - *bbo_out = bbo; - - return VK_SUCCESS; - - fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_free(&cmd_buffer->pool->alloc, bbo); - - return result; -} - -static VkResult -anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, - const struct anv_batch_bo *other_bbo, - struct anv_batch_bo **bbo_out) -{ - VkResult result; - - struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (bbo == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - if (result != VK_SUCCESS) - goto fail_alloc; - - result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, - &other_bbo->relocs); - if (result != VK_SUCCESS) - goto fail_bo_alloc; - - bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); - - bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset; - - *bbo_out = bbo; - - return VK_SUCCESS; - - fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - fail_alloc: - anv_free(&cmd_buffer->pool->alloc, bbo); - - return result; -} - -static void -anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; - bbo->last_ss_pool_bo_offset = 0; - bbo->relocs.num_relocs = 0; -} - -static void -anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, - size_t batch_padding) -{ - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; - batch->relocs = &bbo->relocs; -} - -static void -anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) -{ - assert(batch->start == bbo->bo.map); - bbo->length = batch->next - batch->start; - VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); -} - -static void -anv_batch_bo_destroy(struct anv_batch_bo *bbo, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); - anv_free(&cmd_buffer->pool->alloc, bbo); -} - -static VkResult -anv_batch_bo_list_clone(const struct list_head *list, - struct anv_cmd_buffer *cmd_buffer, - struct list_head *new_list) -{ - VkResult result = VK_SUCCESS; - - list_inithead(new_list); - - struct anv_batch_bo *prev_bbo = NULL; - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo *new_bbo = NULL; - result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); - if (result != VK_SUCCESS) - break; - list_addtail(&new_bbo->link, new_list); - - if (prev_bbo) { - /* As we clone this list of batch_bo's, they chain one to the - * other using MI_BATCH_BUFFER_START commands. We need to fix up - * those relocations as we go. Fortunately, this is pretty easy - * as it will always be the last relocation in the list. - */ - uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; - } - - prev_bbo = new_bbo; - } - - if (result != VK_SUCCESS) { - list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) - anv_batch_bo_destroy(bbo, cmd_buffer); - } - - return result; -} - -/*-----------------------------------------------------------------------* - * Functions related to anv_batch_bo - *-----------------------------------------------------------------------*/ - -static inline struct anv_batch_bo * -anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) -{ - return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); -} - -struct anv_address -anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - return (struct anv_address) { - .bo = &cmd_buffer->device->surface_state_block_pool.bo, - .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks), - }; -} - -static void -emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - /* In gen8+ the address field grew to two dwords to accomodate 48 bit - * offsets. The high 16 bits are in the last dword, so we can use the gen8 - * version in either case, as long as we set the instruction length in the - * header accordingly. This means that we always emit three dwords here - * and all the padding and adjustment we do in this file works for all - * gens. - */ - - const uint32_t gen7_length = - GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; - const uint32_t gen8_length = - GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; - - anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, - .DWordLength = cmd_buffer->device->info.gen < 8 ? - gen7_length : gen8_length, - ._2ndLevelBatchBuffer = _1stlevelbatch, - .AddressSpaceIndicator = ASI_PPGTT, - .BatchBufferStartAddress = { bo, offset }); -} - -static void -cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) -{ - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_batch_bo *current_bbo = - anv_cmd_buffer_current_batch_bo(cmd_buffer); - - /* We set the end of the batch a little short so we would be sure we - * have room for the chaining command. Since we're about to emit the - * chaining command, let's set it back where it should go. - */ - batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); - - emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); - - anv_batch_bo_finish(current_bbo, batch); -} - -static VkResult -anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) -{ - struct anv_cmd_buffer *cmd_buffer = _data; - struct anv_batch_bo *new_bbo; - - VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); - if (result != VK_SUCCESS) - return result; - - struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); - if (seen_bbo == NULL) { - anv_batch_bo_destroy(new_bbo, cmd_buffer); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - *seen_bbo = new_bbo; - - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); - - list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); - - anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); - - return VK_SUCCESS; -} - -struct anv_state -anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries, uint32_t *state_offset) -{ - struct anv_block_pool *block_pool = - &cmd_buffer->device->surface_state_block_pool; - int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks); - struct anv_state state; - - state.alloc_size = align_u32(entries * 4, 32); - - if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size) - return (struct anv_state) { 0 }; - - state.offset = cmd_buffer->bt_next; - state.map = block_pool->map + *bt_block + state.offset; - - cmd_buffer->bt_next += state.alloc_size; - - assert(*bt_block < 0); - *state_offset = -(*bt_block); - - return state; -} - -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) -{ - return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); -} - -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment) -{ - return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, - size, alignment); -} - -VkResult -anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_block_pool *block_pool = - &cmd_buffer->device->surface_state_block_pool; - - int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks); - if (offset == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - *offset = anv_block_pool_alloc_back(block_pool); - cmd_buffer->bt_next = 0; - - return VK_SUCCESS; -} - -VkResult -anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo; - VkResult result; - - list_inithead(&cmd_buffer->batch_bos); - - result = anv_batch_bo_create(cmd_buffer, &batch_bo); - if (result != VK_SUCCESS) - return result; - - list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); - - cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; - cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; - cmd_buffer->batch.user_data = cmd_buffer; - - anv_batch_bo_start(batch_bo, &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - int success = anv_vector_init(&cmd_buffer->seen_bbos, - sizeof(struct anv_bo *), - 8 * sizeof(struct anv_bo *)); - if (!success) - goto fail_batch_bo; - - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; - - success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t), - 8 * sizeof(int32_t)); - if (!success) - goto fail_seen_bbos; - - result = anv_reloc_list_init(&cmd_buffer->surface_relocs, - &cmd_buffer->pool->alloc); - if (result != VK_SUCCESS) - goto fail_bt_blocks; - - anv_cmd_buffer_new_binding_table_block(cmd_buffer); - - cmd_buffer->execbuf2.objects = NULL; - cmd_buffer->execbuf2.bos = NULL; - cmd_buffer->execbuf2.array_length = 0; - - return VK_SUCCESS; - - fail_bt_blocks: - anv_vector_finish(&cmd_buffer->bt_blocks); - fail_seen_bbos: - anv_vector_finish(&cmd_buffer->seen_bbos); - fail_batch_bo: - anv_batch_bo_destroy(batch_bo, cmd_buffer); - - return result; -} - -void -anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - int32_t *bt_block; - anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) { - anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, - *bt_block); - } - anv_vector_finish(&cmd_buffer->bt_blocks); - - anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); - - anv_vector_finish(&cmd_buffer->seen_bbos); - - /* Destroy all of the batch buffers */ - list_for_each_entry_safe(struct anv_batch_bo, bbo, - &cmd_buffer->batch_bos, link) { - anv_batch_bo_destroy(bbo, cmd_buffer); - } - - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos); -} - -void -anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) -{ - /* Delete all but the first batch bo */ - assert(!list_empty(&cmd_buffer->batch_bos)); - while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { - struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - list_del(&bbo->link); - anv_batch_bo_destroy(bbo, cmd_buffer); - } - assert(!list_empty(&cmd_buffer->batch_bos)); - - anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), - &cmd_buffer->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) { - int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks); - anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool, - *bt_block); - } - assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1); - cmd_buffer->bt_next = 0; - - cmd_buffer->surface_relocs.num_relocs = 0; - - /* Reset the list of seen buffers */ - cmd_buffer->seen_bbos.head = 0; - cmd_buffer->seen_bbos.tail = 0; - - *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = - anv_cmd_buffer_current_batch_bo(cmd_buffer); -} - -void -anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* When we start a batch buffer, we subtract a certain amount of - * padding from the end to ensure that we always have room to emit a - * BATCH_BUFFER_START to chain to the next BO. We need to remove - * that padding before we end the batch; otherwise, we may end up - * with our BATCH_BUFFER_END in another BO. - */ - cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); - - anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END); - - /* Round batch up to an even number of dwords. */ - if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) - anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP); - - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; - } - - anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { - /* If this is a secondary command buffer, we need to determine the - * mode in which it will be executed with vkExecuteCommands. We - * determine this statically here so that this stays in sync with the - * actual ExecuteCommands implementation. - */ - if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && - (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { - /* If the secondary has exactly one batch buffer in its list *and* - * that batch buffer is less than half of the maximum size, we're - * probably better of simply copying it into our batch. - */ - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; - } else if (!(cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; - - /* When we chain, we need to add an MI_BATCH_BUFFER_START command - * with its relocation. In order to handle this we'll increment here - * so we can unconditionally decrement right before adding the - * MI_BATCH_BUFFER_START command. - */ - batch_bo->relocs.num_relocs++; - cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4; - } else { - cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; - } - } -} - -static inline VkResult -anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, - struct list_head *list) -{ - list_for_each_entry(struct anv_batch_bo, bbo, list, link) { - struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); - if (bbo_ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - *bbo_ptr = bbo; - } - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary) -{ - switch (secondary->exec_mode) { - case ANV_CMD_BUFFER_EXEC_MODE_EMIT: - anv_batch_emit_batch(&primary->batch, &secondary->batch); - anv_cmd_buffer_emit_state_base_address(primary); - break; - case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { - struct anv_batch_bo *first_bbo = - list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - - emit_batch_buffer_start(primary, &first_bbo->bo, 0); - - struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); - uint32_t offset = primary->batch.next - primary->batch.start; - const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4; - - /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we - * can emit a new command and relocation for the current splice. In - * order to handle the initial-use case, we incremented next and - * num_relocs in end_batch_buffer() so we can alyways just subtract - * here. - */ - last_bbo->relocs.num_relocs--; - secondary->batch.next -= inst_size; - emit_batch_buffer_start(secondary, &this_bbo->bo, offset); - anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); - - /* After patching up the secondary buffer, we need to clflush the - * modified instruction in case we're on a !llc platform. We use a - * little loop to handle the case where the instruction crosses a cache - * line boundary. - */ - if (!primary->device->info.has_llc) { - void *inst = secondary->batch.next - inst_size; - void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK); - __builtin_ia32_mfence(); - while (p < secondary->batch.next) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } - } - - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { - struct list_head copy_list; - VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, - secondary, - ©_list); - if (result != VK_SUCCESS) - return; /* FIXME */ - - anv_cmd_buffer_add_seen_bbos(primary, ©_list); - - struct anv_batch_bo *first_bbo = - list_first_entry(©_list, struct anv_batch_bo, link); - struct anv_batch_bo *last_bbo = - list_last_entry(©_list, struct anv_batch_bo, link); - - cmd_buffer_chain_to_batch_bo(primary, first_bbo); - - list_splicetail(©_list, &primary->batch_bos); - - anv_batch_bo_continue(last_bbo, &primary->batch, - GEN8_MI_BATCH_BUFFER_START_length * 4); - - anv_cmd_buffer_emit_state_base_address(primary); - break; - } - default: - assert(!"Invalid execution mode"); - } - - anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, - &secondary->surface_relocs, 0); -} - -static VkResult -anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, - struct anv_reloc_list *relocs) -{ - struct drm_i915_gem_exec_object2 *obj = NULL; - - if (bo->index < cmd_buffer->execbuf2.bo_count && - cmd_buffer->execbuf2.bos[bo->index] == bo) - obj = &cmd_buffer->execbuf2.objects[bo->index]; - - if (obj == NULL) { - /* We've never seen this one before. Add it to the list and assign - * an id that we can use later. - */ - if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { - uint32_t new_len = cmd_buffer->execbuf2.objects ? - cmd_buffer->execbuf2.array_length * 2 : 64; - - struct drm_i915_gem_exec_object2 *new_objects = - anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_objects == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct anv_bo **new_bos = - anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (new_objects == NULL) { - anv_free(&cmd_buffer->pool->alloc, new_objects); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - if (cmd_buffer->execbuf2.objects) { - memcpy(new_objects, cmd_buffer->execbuf2.objects, - cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); - memcpy(new_bos, cmd_buffer->execbuf2.bos, - cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); - } - - cmd_buffer->execbuf2.objects = new_objects; - cmd_buffer->execbuf2.bos = new_bos; - cmd_buffer->execbuf2.array_length = new_len; - } - - assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); - - bo->index = cmd_buffer->execbuf2.bo_count++; - obj = &cmd_buffer->execbuf2.objects[bo->index]; - cmd_buffer->execbuf2.bos[bo->index] = bo; - - obj->handle = bo->gem_handle; - obj->relocation_count = 0; - obj->relocs_ptr = 0; - obj->alignment = 0; - obj->offset = bo->offset; - obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0; - obj->rsvd1 = 0; - obj->rsvd2 = 0; - } - - if (relocs != NULL && obj->relocation_count == 0) { - /* This is the first time we've ever seen a list of relocations for - * this BO. Go ahead and set the relocations and then walk the list - * of relocations and add them all. - */ - obj->relocation_count = relocs->num_relocs; - obj->relocs_ptr = (uintptr_t) relocs->relocs; - - for (size_t i = 0; i < relocs->num_relocs; i++) { - /* A quick sanity check on relocations */ - assert(relocs->relocs[i].offset < bo->size); - anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); - } - } - - return VK_SUCCESS; -} - -static void -anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, - struct anv_reloc_list *list) -{ - struct anv_bo *bo; - - /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in - * struct drm_i915_gem_exec_object2 against the bos current offset and if - * all bos haven't moved it will skip relocation processing alltogether. - * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming - * value of offset so we can set it either way. For that to work we need - * to make sure all relocs use the same presumed offset. - */ - - for (size_t i = 0; i < list->num_relocs; i++) { - bo = list->reloc_bos[i]; - if (bo->offset != list->relocs[i].presumed_offset) - cmd_buffer->execbuf2.need_reloc = true; - - list->relocs[i].target_handle = bo->index; - } -} - -static uint64_t -read_reloc(const struct anv_device *device, const void *p) -{ - if (device->info.gen >= 8) - return *(uint64_t *)p; - else - return *(uint32_t *)p; -} - -static void -write_reloc(const struct anv_device *device, void *p, uint64_t v) -{ - if (device->info.gen >= 8) - *(uint64_t *)p = v; - else - *(uint32_t *)p = v; -} - -static void -adjust_relocations_from_block_pool(struct anv_block_pool *pool, - struct anv_reloc_list *relocs) -{ - for (size_t i = 0; i < relocs->num_relocs; i++) { - /* In general, we don't know how stale the relocated value is. It - * may have been used last time or it may not. Since we don't want - * to stomp it while the GPU may be accessing it, we haven't updated - * it anywhere else in the code. Instead, we just set the presumed - * offset to what it is now based on the delta and the data in the - * block pool. Then the kernel will update it for us if needed. - */ - assert(relocs->relocs[i].offset < pool->state.end); - const void *p = pool->map + relocs->relocs[i].offset; - - /* We're reading back the relocated value from potentially incoherent - * memory here. However, any change to the value will be from the kernel - * writing out relocations, which will keep the CPU cache up to date. - */ - relocs->relocs[i].presumed_offset = - read_reloc(pool->device, p) - relocs->relocs[i].delta; - - /* All of the relocations from this block pool to other BO's should - * have been emitted relative to the surface block pool center. We - * need to add the center offset to make them relative to the - * beginning of the actual GEM bo. - */ - relocs->relocs[i].offset += pool->center_bo_offset; - } -} - -static void -adjust_relocations_to_block_pool(struct anv_block_pool *pool, - struct anv_bo *from_bo, - struct anv_reloc_list *relocs, - uint32_t *last_pool_center_bo_offset) -{ - assert(*last_pool_center_bo_offset <= pool->center_bo_offset); - uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset; - - /* When we initially emit relocations into a block pool, we don't - * actually know what the final center_bo_offset will be so we just emit - * it as if center_bo_offset == 0. Now that we know what the center - * offset is, we need to walk the list of relocations and adjust any - * relocations that point to the pool bo with the correct offset. - */ - for (size_t i = 0; i < relocs->num_relocs; i++) { - if (relocs->reloc_bos[i] == &pool->bo) { - /* Adjust the delta value in the relocation to correctly - * correspond to the new delta. Initially, this value may have - * been negative (if treated as unsigned), but we trust in - * uint32_t roll-over to fix that for us at this point. - */ - relocs->relocs[i].delta += delta; - - /* Since the delta has changed, we need to update the actual - * relocated value with the new presumed value. This function - * should only be called on batch buffers, so we know it isn't in - * use by the GPU at the moment. - */ - assert(relocs->relocs[i].offset < from_bo->size); - write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset, - relocs->relocs[i].presumed_offset + - relocs->relocs[i].delta); - } - } - - *last_pool_center_bo_offset = pool->center_bo_offset; -} - -void -anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_batch *batch = &cmd_buffer->batch; - struct anv_block_pool *ss_pool = - &cmd_buffer->device->surface_state_block_pool; - - cmd_buffer->execbuf2.bo_count = 0; - cmd_buffer->execbuf2.need_reloc = false; - - adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs); - anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs); - - /* First, we walk over all of the bos we've seen and add them and their - * relocations to the validate list. - */ - struct anv_batch_bo **bbo; - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, - &(*bbo)->last_ss_pool_bo_offset); - - anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); - } - - struct anv_batch_bo *first_batch_bo = - list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); - - /* The kernel requires that the last entry in the validation list be the - * batch buffer to execute. We can simply swap the element - * corresponding to the first batch_bo in the chain with the last - * element in the list. - */ - if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; - uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1; - - struct drm_i915_gem_exec_object2 tmp_obj = - cmd_buffer->execbuf2.objects[idx]; - assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); - - cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx]; - cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx]; - cmd_buffer->execbuf2.bos[idx]->index = idx; - - cmd_buffer->execbuf2.objects[last_idx] = tmp_obj; - cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo; - first_batch_bo->bo.index = last_idx; - } - - /* Now we go through and fixup all of the relocation lists to point to - * the correct indices in the object array. We have to do this after we - * reorder the list above as some of the indices may have changed. - */ - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) - anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); - - anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); - - if (!cmd_buffer->device->info.has_llc) { - __builtin_ia32_mfence(); - anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) - __builtin_ia32_clflush((*bbo)->bo.map + i); - } - } - - cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { - .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, - .buffer_count = cmd_buffer->execbuf2.bo_count, - .batch_start_offset = 0, - .batch_len = batch->next - batch->start, - .cliprects_ptr = 0, - .num_cliprects = 0, - .DR1 = 0, - .DR4 = 0, - .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER | - I915_EXEC_CONSTANTS_REL_GENERAL, - .rsvd1 = cmd_buffer->device->context_id, - .rsvd2 = 0, - }; - - if (!cmd_buffer->execbuf2.need_reloc) - cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; -} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c deleted file mode 100644 index b060828..0000000 --- a/src/vulkan/anv_cmd_buffer.c +++ /dev/null @@ -1,1191 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** \file anv_cmd_buffer.c - * - * This file contains all of the stuff for emitting commands into a command - * buffer. This includes implementations of most of the vkCmd* - * entrypoints. This file is concerned entirely with state emission and - * not with the command buffer data structure itself. As far as this file - * is concerned, most of anv_cmd_buffer is magic. - */ - -/* TODO: These are taken from GLES. We should check the Vulkan spec */ -const struct anv_dynamic_state default_dynamic_state = { - .viewport = { - .count = 0, - }, - .scissor = { - .count = 0, - }, - .line_width = 1.0f, - .depth_bias = { - .bias = 0.0f, - .clamp = 0.0f, - .slope = 0.0f, - }, - .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, - .depth_bounds = { - .min = 0.0f, - .max = 1.0f, - }, - .stencil_compare_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_write_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_reference = { - .front = 0u, - .back = 0u, - }, -}; - -void -anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask) -{ - if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - dest->viewport.count = src->viewport.count; - typed_memcpy(dest->viewport.viewports, src->viewport.viewports, - src->viewport.count); - } - - if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - dest->scissor.count = src->scissor.count; - typed_memcpy(dest->scissor.scissors, src->scissor.scissors, - src->scissor.count); - } - - if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) - dest->line_width = src->line_width; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) - dest->depth_bias = src->depth_bias; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) - typed_memcpy(dest->blend_constants, src->blend_constants, 4); - - if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) - dest->depth_bounds = src->depth_bounds; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) - dest->stencil_compare_mask = src->stencil_compare_mask; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) - dest->stencil_write_mask = src->stencil_write_mask; - - if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) - dest->stencil_reference = src->stencil_reference; -} - -static void -anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - - memset(&state->descriptors, 0, sizeof(state->descriptors)); - memset(&state->push_constants, 0, sizeof(state->push_constants)); - memset(state->binding_tables, 0, sizeof(state->binding_tables)); - memset(state->samplers, 0, sizeof(state->samplers)); - - /* 0 isn't a valid config. This ensures that we always configure L3$. */ - cmd_buffer->state.current_l3_config = 0; - - state->dirty = ~0; - state->vb_dirty = 0; - state->descriptors_dirty = 0; - state->push_constants_dirty = 0; - state->pipeline = NULL; - state->restart_index = UINT32_MAX; - state->dynamic = default_dynamic_state; - state->need_query_wa = true; - - if (state->attachments != NULL) { - anv_free(&cmd_buffer->pool->alloc, state->attachments); - state->attachments = NULL; - } - - state->gen7.index_buffer = NULL; -} - -/** - * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass. - */ -void -anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - ANV_FROM_HANDLE(anv_render_pass, pass, info->renderPass); - - anv_free(&cmd_buffer->pool->alloc, state->attachments); - - if (pass->attachment_count == 0) { - state->attachments = NULL; - return; - } - - state->attachments = anv_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * - sizeof(state->attachments[0]), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - /* FIXME: Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */ - abort(); - } - - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags clear_aspects = 0; - - if (anv_format_is_color(att->format)) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if (att->format->has_depth && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - } - if (att->format->has_stencil && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } - - state->attachments[i].pending_clear_aspects = clear_aspects; - if (clear_aspects) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } - } -} - -static VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size) -{ - struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; - - if (*ptr == NULL) { - *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else if ((*ptr)->size < size) { - *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - (*ptr)->size = size; - - return VK_SUCCESS; -} - -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - -static VkResult anv_create_cmd_buffer( - struct anv_device * device, - struct anv_cmd_pool * pool, - VkCommandBufferLevel level, - VkCommandBuffer* pCommandBuffer) -{ - struct anv_cmd_buffer *cmd_buffer; - VkResult result; - - cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cmd_buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - cmd_buffer->device = device; - cmd_buffer->pool = pool; - cmd_buffer->level = level; - cmd_buffer->state.attachments = NULL; - - result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); - if (result != VK_SUCCESS) - goto fail; - - anv_state_stream_init(&cmd_buffer->surface_state_stream, - &device->surface_state_block_pool); - anv_state_stream_init(&cmd_buffer->dynamic_state_stream, - &device->dynamic_state_block_pool); - - if (pool) { - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - } else { - /* Init the pool_link so we can safefly call list_del when we destroy - * the command buffer - */ - list_inithead(&cmd_buffer->pool_link); - } - - *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer); - - return VK_SUCCESS; - - fail: - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); - - return result; -} - -VkResult anv_AllocateCommandBuffers( - VkDevice _device, - const VkCommandBufferAllocateInfo* pAllocateInfo, - VkCommandBuffer* pCommandBuffers) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool); - - VkResult result = VK_SUCCESS; - uint32_t i; - - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) - anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, - i, pCommandBuffers); - - return result; -} - -static void -anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer) -{ - list_del(&cmd_buffer->pool_link); - - anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); - - anv_state_stream_finish(&cmd_buffer->surface_state_stream); - anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); - - anv_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - anv_free(&cmd_buffer->pool->alloc, cmd_buffer); -} - -void anv_FreeCommandBuffers( - VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer* pCommandBuffers) -{ - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - - anv_cmd_buffer_destroy(cmd_buffer); - } -} - -VkResult anv_ResetCommandBuffer( - VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->usage_flags = 0; - cmd_buffer->state.current_pipeline = UINT32_MAX; - anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); - anv_cmd_state_reset(cmd_buffer); - - return VK_SUCCESS; -} - -void -anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) -{ - switch (cmd_buffer->device->info.gen) { - case 7: - if (cmd_buffer->device->info.is_haswell) - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - else - return gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - case 8: - return gen8_cmd_buffer_emit_state_base_address(cmd_buffer); - case 9: - return gen9_cmd_buffer_emit_state_base_address(cmd_buffer); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_BeginCommandBuffer( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - /* If this is the first vkBeginCommandBuffer, we must *initialize* the - * command buffer's state. Otherwise, we must *reset* its state. In both - * cases we reset it. - * - * From the Vulkan 1.0 spec: - * - * If a command buffer is in the executable state and the command buffer - * was allocated from a command pool with the - * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then - * vkBeginCommandBuffer implicitly resets the command buffer, behaving - * as if vkResetCommandBuffer had been called with - * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts - * the command buffer in the recording state. - */ - anv_ResetCommandBuffer(commandBuffer, /*flags*/ 0); - - cmd_buffer->usage_flags = pBeginInfo->flags; - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || - !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); - - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - if (cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - cmd_buffer->state.framebuffer = - anv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); - cmd_buffer->state.pass = - anv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); - - struct anv_subpass *subpass = - &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - - anv_cmd_buffer_set_subpass(cmd_buffer, subpass); - } - - return VK_SUCCESS; -} - -VkResult anv_EndCommandBuffer( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_device *device = cmd_buffer->device; - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - /* The algorithm used to compute the validate list is not threadsafe as - * it uses the bo->index field. We have to lock the device around it. - * Fortunately, the chances for contention here are probably very low. - */ - pthread_mutex_lock(&device->mutex); - anv_cmd_buffer_prepare_execbuf(cmd_buffer); - pthread_mutex_unlock(&device->mutex); - } - - return VK_SUCCESS; -} - -void anv_CmdBindPipeline( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_COMPUTE_BIT; - break; - - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd_buffer->state.pipeline = pipeline; - cmd_buffer->state.vb_dirty |= pipeline->vb_used; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.push_constants_dirty |= pipeline->active_stages; - cmd_buffer->state.descriptors_dirty |= pipeline->active_stages; - - /* Apply the dynamic state from the pipeline */ - cmd_buffer->state.dirty |= pipeline->dynamic_state_mask; - anv_dynamic_state_copy(&cmd_buffer->state.dynamic, - &pipeline->dynamic_state, - pipeline->dynamic_state_mask); - break; - - default: - assert(!"invalid bind point"); - break; - } -} - -void anv_CmdSetViewport( - VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport* pViewports) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - const uint32_t total_count = firstViewport + viewportCount; - if (cmd_buffer->state.dynamic.viewport.count < total_count); - cmd_buffer->state.dynamic.viewport.count = total_count; - - memcpy(cmd_buffer->state.dynamic.viewport.viewports + firstViewport, - pViewports, viewportCount * sizeof(*pViewports)); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT; -} - -void anv_CmdSetScissor( - VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D* pScissors) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - const uint32_t total_count = firstScissor + scissorCount; - if (cmd_buffer->state.dynamic.scissor.count < total_count); - cmd_buffer->state.dynamic.scissor.count = total_count; - - memcpy(cmd_buffer->state.dynamic.scissor.scissors + firstScissor, - pScissors, scissorCount * sizeof(*pScissors)); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR; -} - -void anv_CmdSetLineWidth( - VkCommandBuffer commandBuffer, - float lineWidth) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.line_width = lineWidth; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; -} - -void anv_CmdSetDepthBias( - VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; - cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; - cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; -} - -void anv_CmdSetBlendConstants( - VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - memcpy(cmd_buffer->state.dynamic.blend_constants, - blendConstants, sizeof(float) * 4); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; -} - -void anv_CmdSetDepthBounds( - VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; - cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; -} - -void anv_CmdSetStencilCompareMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; -} - -void anv_CmdSetStencilWriteMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; -} - -void anv_CmdSetStencilReference( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_reference.front = reference; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_reference.back = reference; - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; -} - -void anv_CmdBindDescriptorSets( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); - struct anv_descriptor_set_layout *set_layout; - - assert(firstSet + descriptorSetCount < MAX_SETS); - - uint32_t dynamic_slot = 0; - for (uint32_t i = 0; i < descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - set_layout = layout->set[firstSet + i].layout; - - if (cmd_buffer->state.descriptors[firstSet + i] != set) { - cmd_buffer->state.descriptors[firstSet + i] = set; - cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; - } - - if (set_layout->dynamic_offset_count > 0) { - anv_foreach_stage(s, set_layout->shader_stages) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic); - - struct anv_push_constants *push = - cmd_buffer->state.push_constants[s]; - - unsigned d = layout->set[firstSet + i].dynamic_offset_start; - const uint32_t *offsets = pDynamicOffsets + dynamic_slot; - struct anv_descriptor *desc = set->descriptors; - - for (unsigned b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index < 0) - continue; - - unsigned array_size = set_layout->binding[b].array_size; - for (unsigned j = 0; j < array_size; j++) { - uint32_t range = 0; - if (desc->buffer_view) - range = desc->buffer_view->range; - push->dynamic[d].offset = *(offsets++); - push->dynamic[d].range = range; - desc++; - d++; - } - } - } - cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages; - } - } -} - -void anv_CmdBindVertexBuffers( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; - - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ - - assert(firstBinding + bindingCount < MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); - vb[firstBinding + i].offset = pOffsets[i]; - cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i); - } -} - -static void -add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer, - struct anv_state state, struct anv_bo *bo, uint32_t offset) -{ - /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and - * 9 for gen8+. We only write the first dword for gen8+ here and rely on - * the initial state to set the high bits to 0. */ - - const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8; - - anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc, - state.offset + dword * 4, bo, offset); -} - -const struct anv_format * -anv_format_for_descriptor_type(VkDescriptorType type) -{ - switch (type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_R32G32B32A32_SFLOAT); - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - return anv_format_for_vk_format(VK_FORMAT_UNDEFINED); - - default: - unreachable("Invalid descriptor type"); - } -} - -VkResult -anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, - struct anv_state *bt_state) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_pipeline_bind_map *map; - uint32_t color_count, bias, state_offset; - - switch (stage) { - case MESA_SHADER_FRAGMENT: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = MAX_RTS; - color_count = subpass->color_count; - break; - case MESA_SHADER_COMPUTE: - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - bias = 1; - color_count = 0; - break; - default: - map = &cmd_buffer->state.pipeline->bindings[stage]; - bias = 0; - color_count = 0; - break; - } - - if (color_count + map->surface_count == 0) { - *bt_state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, - bias + map->surface_count, - &state_offset); - uint32_t *bt_map = bt_state->map; - - if (bt_state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t a = 0; a < color_count; a++) { - const struct anv_image_view *iview = - fb->attachments[subpass->color_attachments[a]]; - - assert(iview->color_rt_surface_state.alloc_size); - bt_map[a] = iview->color_rt_surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state, - iview->bo, iview->offset); - } - - if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { - struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; - uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; - - struct anv_state surface_state; - surface_state = - anv_cmd_buffer_alloc_surface_state(cmd_buffer); - - const struct anv_format *format = - anv_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - anv_fill_buffer_surface_state(cmd_buffer->device, surface_state, - format->isl_format, bo_offset, 12, 1); - - bt_map[0] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - - if (map->surface_count == 0) - goto out; - - if (map->image_count > 0) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images); - if (result != VK_SUCCESS) - return result; - - cmd_buffer->state.push_constants_dirty |= 1 << stage; - } - - uint32_t image = 0; - for (uint32_t s = 0; s < map->surface_count; s++) { - struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - struct anv_state surface_state; - struct anv_bo *bo; - uint32_t bo_offset; - - switch (desc->type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - /* Nothing for us to do here */ - continue; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - surface_state = desc->image_view->sampler_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - surface_state = desc->image_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->image_view->bo; - bo_offset = desc->image_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view, - image_param); - image_param->surface_idx = bias + s; - break; - } - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - surface_state = desc->buffer_view->surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - surface_state = desc->buffer_view->storage_surface_state; - assert(surface_state.alloc_size); - bo = desc->buffer_view->bo; - bo_offset = desc->buffer_view->offset; - - struct brw_image_param *image_param = - &cmd_buffer->state.push_constants[stage]->images[image++]; - - anv_buffer_view_fill_image_param(cmd_buffer->device, desc->buffer_view, - image_param); - image_param->surface_idx = bias + s; - break; - - default: - assert(!"Invalid descriptor type"); - continue; - } - - bt_map[bias + s] = surface_state.offset + state_offset; - add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset); - } - assert(image == map->image_count); - - out: - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*bt_state); - - return VK_SUCCESS; -} - -VkResult -anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, struct anv_state *state) -{ - struct anv_pipeline_bind_map *map; - - if (stage == MESA_SHADER_COMPUTE) - map = &cmd_buffer->state.compute_pipeline->bindings[stage]; - else - map = &cmd_buffer->state.pipeline->bindings[stage]; - - if (map->sampler_count == 0) { - *state = (struct anv_state) { 0, }; - return VK_SUCCESS; - } - - uint32_t size = map->sampler_count * 16; - *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); - - if (state->map == NULL) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - for (uint32_t s = 0; s < map->sampler_count; s++) { - struct anv_pipeline_binding *binding = &map->sampler_to_descriptor[s]; - struct anv_descriptor_set *set = - cmd_buffer->state.descriptors[binding->set]; - struct anv_descriptor *desc = &set->descriptors[binding->offset]; - - if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER && - desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - continue; - - struct anv_sampler *sampler = desc->sampler; - - /* This can happen if we have an unfilled slot since TYPE_SAMPLER - * happens to be zero. - */ - if (sampler == NULL) - continue; - - memcpy(state->map + (s * 16), - sampler->state, sizeof(sampler->state)); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(*state); - - return VK_SUCCESS; -} - -struct anv_state -anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - const void *data, uint32_t size, uint32_t alignment) -{ - struct anv_state state; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); - memcpy(state.map, data, size); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); - - return state; -} - -struct anv_state -anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment) -{ - struct anv_state state; - uint32_t *p; - - state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - dwords * 4, alignment); - p = state.map; - for (uint32_t i = 0; i < dwords; i++) - p[i] = a[i] | b[i]; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); - - return state; -} - -/** - * @brief Setup the command buffer for recording commands inside the given - * subpass. - * - * This does not record all commands needed for starting the subpass. - * Starting the subpass may require additional commands. - * - * Note that vkCmdBeginRenderPass, vkCmdNextSubpass, and vkBeginCommandBuffer - * with VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT, all setup the - * command buffer for recording commands for some subpass. But only the first - * two, vkCmdBeginRenderPass and vkCmdNextSubpass, can start a subpass. - */ -void -anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - switch (cmd_buffer->device->info.gen) { - case 7: - gen7_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 8: - gen8_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - case 9: - gen9_cmd_buffer_set_subpass(cmd_buffer, subpass); - break; - default: - unreachable("unsupported gen\n"); - } -} - -struct anv_state -anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage) -{ - struct anv_push_constants *data = - cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = - cmd_buffer->state.pipeline->prog_data[stage]; - - /* If we don't actually have any push constants, bail. */ - if (data == NULL || prog_data->nr_params == 0) - return (struct anv_state) { .offset = 0 }; - - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - prog_data->nr_params * sizeof(float), - 32 /* bottom 5 bits MBZ */); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[i] = *(uint32_t *)((uint8_t *)data + offset); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -struct anv_state -anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_push_constants *data = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - const unsigned push_constant_data_size = - (local_id_dwords + prog_data->nr_params) * 4; - const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - const unsigned param_aligned_count = - reg_aligned_constant_size / sizeof(uint32_t); - - /* If we don't actually have any push constants, bail. */ - if (reg_aligned_constant_size == 0) - return (struct anv_state) { .offset = 0 }; - - const unsigned threads = pipeline->cs_thread_width_max; - const unsigned total_push_constants_size = - reg_aligned_constant_size * threads; - const unsigned push_constant_alignment = - cmd_buffer->device->info.gen < 8 ? 32 : 64; - const unsigned aligned_total_push_constants_size = - ALIGN(total_push_constants_size, push_constant_alignment); - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - aligned_total_push_constants_size, - push_constant_alignment); - - /* Walk through the param array and fill the buffer with data */ - uint32_t *u32_map = state.map; - - brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads, - reg_aligned_constant_size); - - /* Setup uniform data for the first thread */ - for (unsigned i = 0; i < prog_data->nr_params; i++) { - uint32_t offset = (uintptr_t)prog_data->param[i]; - u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset); - } - - /* Copy uniform data from the first thread to every other thread */ - const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t); - for (unsigned t = 1; t < threads; t++) { - memcpy(&u32_map[t * param_aligned_count + local_id_dwords], - &u32_map[local_id_dwords], - uniform_data_size); - } - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -void anv_CmdPushConstants( - VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void* pValues) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_foreach_stage(stage, stageFlags) { - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data); - - memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, - pValues, size); - } - - cmd_buffer->state.push_constants_dirty |= stageFlags; -} - -void anv_CmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - - assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - - anv_cmd_buffer_add_secondary(primary, secondary); - } -} - -VkResult anv_CreateCommandPool( - VkDevice _device, - const VkCommandPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkCommandPool* pCmdPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_cmd_pool *pool; - - pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - if (pAllocator) - pool->alloc = *pAllocator; - else - pool->alloc = device->alloc; - - list_inithead(&pool->cmd_buffers); - - *pCmdPool = anv_cmd_pool_to_handle(pool); - - return VK_SUCCESS; -} - -void anv_DestroyCommandPool( - VkDevice _device, - VkCommandPool commandPool, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - - anv_ResetCommandPool(_device, commandPool, 0); - - anv_free2(&device->alloc, pAllocator, pool); -} - -VkResult anv_ResetCommandPool( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); - - /* FIXME: vkResetCommandPool must not destroy its command buffers. The - * Vulkan 1.0 spec requires that it only reset them: - * - * Resetting a command pool recycles all of the resources from all of - * the command buffers allocated from the command pool back to the - * command pool. All command buffers that have been allocated from the - * command pool are put in the initial state. - */ - list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - anv_cmd_buffer_destroy(cmd_buffer); - } - - return VK_SUCCESS; -} - -/** - * Return NULL if the current subpass has no depthstencil attachment. - */ -const struct anv_image_view * -anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - - if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED) - return NULL; - - const struct anv_image_view *iview = - fb->attachments[subpass->depth_stencil_attachment]; - - assert(iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - - return iview; -} diff --git a/src/vulkan/anv_descriptor_set.c b/src/vulkan/anv_descriptor_set.c deleted file mode 100644 index 7a77336..0000000 --- a/src/vulkan/anv_descriptor_set.c +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/* - * Descriptor set layouts. - */ - -VkResult anv_CreateDescriptorSetLayout( - VkDevice _device, - const VkDescriptorSetLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorSetLayout* pSetLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_descriptor_set_layout *set_layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - - uint32_t max_binding = 0; - uint32_t immutable_sampler_count = 0; - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); - if (pCreateInfo->pBindings[j].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; - } - - size_t size = sizeof(struct anv_descriptor_set_layout) + - (max_binding + 1) * sizeof(set_layout->binding[0]) + - immutable_sampler_count * sizeof(struct anv_sampler *); - - set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set_layout) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* We just allocate all the samplers at the end of the struct */ - struct anv_sampler **samplers = - (struct anv_sampler **)&set_layout->binding[max_binding + 1]; - - set_layout->binding_count = max_binding + 1; - set_layout->shader_stages = 0; - set_layout->size = 0; - - for (uint32_t b = 0; b <= max_binding; b++) { - /* Initialize all binding_layout entries to -1 */ - memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b])); - - set_layout->binding[b].immutable_samplers = NULL; - } - - /* Initialize all samplers to 0 */ - memset(samplers, 0, immutable_sampler_count * sizeof(*samplers)); - - uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t surface_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t image_count[MESA_SHADER_STAGES] = { 0, }; - uint32_t buffer_count = 0; - uint32_t dynamic_offset_count = 0; - - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; - uint32_t b = binding->binding; - - assert(binding->descriptorCount > 0); - set_layout->binding[b].array_size = binding->descriptorCount; - set_layout->binding[b].descriptor_index = set_layout->size; - set_layout->size += binding->descriptorCount; - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].sampler_index = sampler_count[s]; - sampler_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - set_layout->binding[b].buffer_index = buffer_count; - buffer_count += binding->descriptorCount; - /* fall through */ - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].surface_index = surface_count[s]; - surface_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - set_layout->binding[b].dynamic_offset_index = dynamic_offset_count; - dynamic_offset_count += binding->descriptorCount; - break; - default: - break; - } - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - anv_foreach_stage(s, binding->stageFlags) { - set_layout->binding[b].stage[s].image_index = image_count[s]; - image_count[s] += binding->descriptorCount; - } - break; - default: - break; - } - - if (binding->pImmutableSamplers) { - set_layout->binding[b].immutable_samplers = samplers; - samplers += binding->descriptorCount; - - for (uint32_t i = 0; i < binding->descriptorCount; i++) - set_layout->binding[b].immutable_samplers[i] = - anv_sampler_from_handle(binding->pImmutableSamplers[i]); - } else { - set_layout->binding[b].immutable_samplers = NULL; - } - - set_layout->shader_stages |= binding->stageFlags; - } - - set_layout->buffer_count = buffer_count; - set_layout->dynamic_offset_count = dynamic_offset_count; - - *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); - - return VK_SUCCESS; -} - -void anv_DestroyDescriptorSetLayout( - VkDevice _device, - VkDescriptorSetLayout _set_layout, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); - - anv_free2(&device->alloc, pAllocator, set_layout); -} - -/* - * Pipeline layouts. These have nothing to do with the pipeline. They are - * just muttiple descriptor set layouts pasted together - */ - -VkResult anv_CreatePipelineLayout( - VkDevice _device, - const VkPipelineLayoutCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineLayout* pPipelineLayout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_layout *layout; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (layout == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - layout->num_sets = pCreateInfo->setLayoutCount; - - unsigned dynamic_offset_count = 0; - - memset(layout->stage, 0, sizeof(layout->stage)); - for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[set]); - layout->set[set].layout = set_layout; - - layout->set[set].dynamic_offset_start = dynamic_offset_count; - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - if (set_layout->binding[b].dynamic_offset_index >= 0) - dynamic_offset_count += set_layout->binding[b].array_size; - for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) { - if (set_layout->binding[b].stage[s].surface_index >= 0) - layout->stage[s].has_dynamic_offsets = true; - } - } - } - - *pPipelineLayout = anv_pipeline_layout_to_handle(layout); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineLayout( - VkDevice _device, - VkPipelineLayout _pipelineLayout, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); - - anv_free2(&device->alloc, pAllocator, pipeline_layout); -} - -/* - * Descriptor pools. These are a no-op for now. - */ - -VkResult anv_CreateDescriptorPool( - VkDevice device, - const VkDescriptorPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDescriptorPool* pDescriptorPool) -{ - anv_finishme("VkDescriptorPool is a stub"); - *pDescriptorPool = (VkDescriptorPool)1; - return VK_SUCCESS; -} - -void anv_DestroyDescriptorPool( - VkDevice _device, - VkDescriptorPool _pool, - const VkAllocationCallbacks* pAllocator) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); -} - -VkResult anv_ResetDescriptorPool( - VkDevice device, - VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags) -{ - anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); - return VK_SUCCESS; -} - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set) -{ - struct anv_descriptor_set *set; - size_t size = sizeof(*set) + layout->size * sizeof(set->descriptors[0]); - - set = anv_alloc(&device->alloc /* XXX: Use the pool */, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* A descriptor set may not be 100% filled. Clear the set so we can can - * later detect holes in it. - */ - memset(set, 0, size); - - set->layout = layout; - - /* Go through and fill out immutable samplers if we have any */ - struct anv_descriptor *desc = set->descriptors; - for (uint32_t b = 0; b < layout->binding_count; b++) { - if (layout->binding[b].immutable_samplers) { - for (uint32_t i = 0; i < layout->binding[b].array_size; i++) - desc[i].sampler = layout->binding[b].immutable_samplers[i]; - } - desc += layout->binding[b].array_size; - } - - /* XXX: Use the pool */ - set->buffer_views = - anv_alloc(&device->alloc, - sizeof(set->buffer_views[0]) * layout->buffer_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set->buffer_views) { - anv_free(&device->alloc, set); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - for (uint32_t b = 0; b < layout->buffer_count; b++) { - set->buffer_views[b].surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } - set->buffer_count = layout->buffer_count; - *out_set = set; - - return VK_SUCCESS; -} - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set) -{ - /* XXX: Use the pool */ - for (uint32_t b = 0; b < set->buffer_count; b++) - anv_state_pool_free(&device->surface_state_pool, - set->buffer_views[b].surface_state); - - anv_free(&device->alloc, set->buffer_views); - anv_free(&device->alloc, set); -} - -VkResult anv_AllocateDescriptorSets( - VkDevice _device, - const VkDescriptorSetAllocateInfo* pAllocateInfo, - VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - VkResult result = VK_SUCCESS; - struct anv_descriptor_set *set; - uint32_t i; - - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { - ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, - pAllocateInfo->pSetLayouts[i]); - - result = anv_descriptor_set_create(device, layout, &set); - if (result != VK_SUCCESS) - break; - - pDescriptorSets[i] = anv_descriptor_set_to_handle(set); - } - - if (result != VK_SUCCESS) - anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, - i, pDescriptorSets); - - return result; -} - -VkResult anv_FreeDescriptorSets( - VkDevice _device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet* pDescriptorSets) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); - - anv_descriptor_set_destroy(device, set); - } - - return VK_SUCCESS; -} - -void anv_UpdateDescriptorSets( - VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet* pDescriptorCopies) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - for (uint32_t i = 0; i < descriptorWriteCount; i++) { - const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; - ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet); - const struct anv_descriptor_set_binding_layout *bind_layout = - &set->layout->binding[write->dstBinding]; - struct anv_descriptor *desc = - &set->descriptors[bind_layout->descriptor_index]; - desc += write->dstArrayElement; - - switch (write->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pImageInfo[j].sampler); - - desc[j] = (struct anv_descriptor) { - .type = VK_DESCRIPTOR_TYPE_SAMPLER, - .sampler = sampler, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pImageInfo[j].imageView); - ANV_FROM_HANDLE(anv_sampler, sampler, - write->pImageInfo[j].sampler); - - desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - desc[j].image_view = iview; - - /* If this descriptor has an immutable sampler, we don't want - * to stomp on it. - */ - if (sampler) - desc[j].sampler = sampler; - } - break; - - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_image_view, iview, - write->pImageInfo[j].imageView); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .image_view = iview, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - ANV_FROM_HANDLE(anv_buffer_view, bview, - write->pTexelBufferView[j]); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer_view = bview, - }; - } - break; - - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - anv_finishme("input attachments not implemented"); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (uint32_t j = 0; j < write->descriptorCount; j++) { - assert(write->pBufferInfo[j].buffer); - ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer); - assert(buffer); - - struct anv_buffer_view *view = - &set->buffer_views[bind_layout->buffer_index]; - view += write->dstArrayElement + j; - - const struct anv_format *format = - anv_format_for_descriptor_type(write->descriptorType); - - view->format = format->isl_format; - view->bo = buffer->bo; - view->offset = buffer->offset + write->pBufferInfo[j].offset; - - /* For buffers with dynamic offsets, we use the full possible - * range in the surface state and do the actual range-checking - * in the shader. - */ - if (bind_layout->dynamic_offset_index >= 0 || - write->pBufferInfo[j].range == VK_WHOLE_SIZE) - view->range = buffer->size - write->pBufferInfo[j].offset; - else - view->range = write->pBufferInfo[j].range; - - anv_fill_buffer_surface_state(device, view->surface_state, - view->format, - view->offset, view->range, 1); - - desc[j] = (struct anv_descriptor) { - .type = write->descriptorType, - .buffer_view = view, - }; - - } - - default: - break; - } - } - - for (uint32_t i = 0; i < descriptorCopyCount; i++) { - const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; - ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet); - ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet); - - const struct anv_descriptor_set_binding_layout *src_layout = - &src->layout->binding[copy->srcBinding]; - struct anv_descriptor *src_desc = - &src->descriptors[src_layout->descriptor_index]; - src_desc += copy->srcArrayElement; - - const struct anv_descriptor_set_binding_layout *dst_layout = - &dst->layout->binding[copy->dstBinding]; - struct anv_descriptor *dst_desc = - &dst->descriptors[dst_layout->descriptor_index]; - dst_desc += copy->dstArrayElement; - - for (uint32_t j = 0; j < copy->descriptorCount; j++) - dst_desc[j] = src_desc[j]; - } -} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c deleted file mode 100644 index a8835f7..0000000 --- a/src/vulkan/anv_device.c +++ /dev/null @@ -1,1789 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" -#include "mesa/main/git_sha1.h" -#include "util/strtod.h" -#include "util/debug.h" - -#include "genxml/gen7_pack.h" - -struct anv_dispatch_table dtable; - -static void -compiler_debug_log(void *data, const char *fmt, ...) -{ } - -static void -compiler_perf_log(void *data, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) - vfprintf(stderr, fmt, args); - - va_end(args); -} - -static VkResult -anv_physical_device_init(struct anv_physical_device *device, - struct anv_instance *instance, - const char *path) -{ - VkResult result; - int fd; - - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) - return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to open %s: %m", path); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = instance; - device->path = path; - - device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); - if (!device->chipset_id) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get chipset id: %m"); - goto fail; - } - - device->name = brw_get_device_name(device->chipset_id); - device->info = brw_get_device_info(device->chipset_id); - if (!device->info) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get device info"); - goto fail; - } - - if (device->info->is_haswell) { - fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); - } else if (device->info->gen == 7 && !device->info->is_baytrail) { - fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); - } else if (device->info->gen == 7 && device->info->is_baytrail) { - fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); - } else if (device->info->gen >= 8) { - /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully - * supported as anything */ - } else { - result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, - "Vulkan not yet supported on %s", device->name); - goto fail; - } - - if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "failed to get aperture size: %m"); - goto fail; - } - - if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing gem wait"); - goto fail; - } - - if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing execbuf2"); - goto fail; - } - - if (!device->info->has_llc && - anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "kernel missing wc mmap"); - goto fail; - } - - bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); - - close(fd); - - brw_process_intel_debug_variable(); - - device->compiler = brw_compiler_create(NULL, device->info); - if (device->compiler == NULL) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - device->compiler->shader_debug_log = compiler_debug_log; - device->compiler->shader_perf_log = compiler_perf_log; - - /* XXX: Actually detect bit6 swizzling */ - isl_device_init(&device->isl_dev, device->info, swizzled); - - return VK_SUCCESS; - -fail: - close(fd); - return result; -} - -static void -anv_physical_device_finish(struct anv_physical_device *device) -{ - ralloc_free(device->compiler); -} - -static const VkExtensionProperties global_extensions[] = { - { - .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, - .specVersion = 25, - }, - { - .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 5, - }, -#ifdef HAVE_WAYLAND_PLATFORM - { - .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 4, - }, -#endif -}; - -static const VkExtensionProperties device_extensions[] = { - { - .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 67, - }, -}; - -static void * -default_alloc_func(void *pUserData, size_t size, size_t align, - VkSystemAllocationScope allocationScope) -{ - return malloc(size); -} - -static void * -default_realloc_func(void *pUserData, void *pOriginal, size_t size, - size_t align, VkSystemAllocationScope allocationScope) -{ - return realloc(pOriginal, size); -} - -static void -default_free_func(void *pUserData, void *pMemory) -{ - free(pMemory); -} - -static const VkAllocationCallbacks default_alloc = { - .pUserData = NULL, - .pfnAllocation = default_alloc_func, - .pfnReallocation = default_realloc_func, - .pfnFree = default_free_func, -}; - -VkResult anv_CreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkInstance* pInstance) -{ - struct anv_instance *instance; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - - uint32_t client_version = pCreateInfo->pApplicationInfo ? - pCreateInfo->pApplicationInfo->apiVersion : - VK_MAKE_VERSION(1, 0, 0); - if (VK_MAKE_VERSION(1, 0, 0) > client_version || - client_version > VK_MAKE_VERSION(1, 0, 3)) { - return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, - "Client requested version %d.%d.%d", - VK_VERSION_MAJOR(client_version), - VK_VERSION_MINOR(client_version), - VK_VERSION_PATCH(client_version)); - } - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - global_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) - return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); - } - - instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!instance) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - - if (pAllocator) - instance->alloc = *pAllocator; - else - instance->alloc = default_alloc; - - instance->apiVersion = client_version; - instance->physicalDeviceCount = -1; - - _mesa_locale_init(); - - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - - anv_init_wsi(instance); - - *pInstance = anv_instance_to_handle(instance); - - return VK_SUCCESS; -} - -void anv_DestroyInstance( - VkInstance _instance, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - if (instance->physicalDeviceCount > 0) { - /* We support at most one physical device. */ - assert(instance->physicalDeviceCount == 1); - anv_physical_device_finish(&instance->physicalDevice); - } - - anv_finish_wsi(instance); - - VG(VALGRIND_DESTROY_MEMPOOL(instance)); - - _mesa_locale_fini(); - - anv_free(&instance->alloc, instance); -} - -VkResult anv_EnumeratePhysicalDevices( - VkInstance _instance, - uint32_t* pPhysicalDeviceCount, - VkPhysicalDevice* pPhysicalDevices) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - VkResult result; - - if (instance->physicalDeviceCount < 0) { - result = anv_physical_device_init(&instance->physicalDevice, - instance, "/dev/dri/renderD128"); - if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { - instance->physicalDeviceCount = 0; - } else if (result == VK_SUCCESS) { - instance->physicalDeviceCount = 1; - } else { - return result; - } - } - - /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; - * otherwise it's an inout parameter. - * - * The Vulkan spec (git aaed022) says: - * - * pPhysicalDeviceCount is a pointer to an unsigned integer variable - * that is initialized with the number of devices the application is - * prepared to receive handles to. pname:pPhysicalDevices is pointer to - * an array of at least this many VkPhysicalDevice handles [...]. - * - * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices - * overwrites the contents of the variable pointed to by - * pPhysicalDeviceCount with the number of physical devices in in the - * instance; otherwise, vkEnumeratePhysicalDevices overwrites - * pPhysicalDeviceCount with the number of physical handles written to - * pPhysicalDevices. - */ - if (!pPhysicalDevices) { - *pPhysicalDeviceCount = instance->physicalDeviceCount; - } else if (*pPhysicalDeviceCount >= 1) { - pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); - *pPhysicalDeviceCount = 1; - } else { - *pPhysicalDeviceCount = 0; - } - - return VK_SUCCESS; -} - -void anv_GetPhysicalDeviceFeatures( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures* pFeatures) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = true, - .fullDrawIndexUint32 = true, - .imageCubeArray = false, - .independentBlend = pdevice->info->gen >= 8, - .geometryShader = true, - .tessellationShader = false, - .sampleRateShading = false, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = false, - .depthBiasClamp = false, - .fillModeNonSolid = true, - .depthBounds = false, - .wideLines = true, - .largePoints = true, - .alphaToOne = true, - .multiViewport = true, - .samplerAnisotropy = false, /* FINISHME */ - .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = true, - .textureCompressionBC = true, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, - .vertexPipelineStoresAndAtomics = pdevice->info->gen >= 8, - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = true, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, - .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = true, - .shaderStorageBufferArrayDynamicIndexing = true, - .shaderStorageImageArrayDynamicIndexing = true, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = true, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .alphaToOne = true, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; -} - -void -anv_device_get_cache_uuid(void *uuid) -{ - memset(uuid, 0, VK_UUID_SIZE); - snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4); -} - -void anv_GetPhysicalDeviceProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties* pProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - const struct brw_device_info *devinfo = pdevice->info; - - anv_finishme("Get correct values for VkPhysicalDeviceLimits"); - - const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; - - VkSampleCountFlags sample_counts = - isl_device_get_sample_counts(&pdevice->isl_dev); - - VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = (1 << 14), - .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 10), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 10), - .maxTexelBufferElements = 128 * 1024 * 1024, - .maxUniformBufferRange = UINT32_MAX, - .maxStorageBufferRange = UINT32_MAX, - .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, - .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = 64 * 1024, - .bufferImageGranularity = 64, /* A cache line */ - .sparseAddressSpaceSize = 0, - .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = 64, - .maxPerStageDescriptorUniformBuffers = 64, - .maxPerStageDescriptorStorageBuffers = 64, - .maxPerStageDescriptorSampledImages = 64, - .maxPerStageDescriptorStorageImages = 64, - .maxPerStageDescriptorInputAttachments = 64, - .maxPerStageResources = 128, - .maxDescriptorSetSamplers = 256, - .maxDescriptorSetUniformBuffers = 256, - .maxDescriptorSetUniformBuffersDynamic = 256, - .maxDescriptorSetStorageBuffers = 256, - .maxDescriptorSetStorageBuffersDynamic = 256, - .maxDescriptorSetSampledImages = 256, - .maxDescriptorSetStorageImages = 256, - .maxDescriptorSetInputAttachments = 256, - .maxVertexInputAttributes = 32, - .maxVertexInputBindings = 32, - .maxVertexInputAttributeOffset = 2047, - .maxVertexInputBindingStride = 2048, - .maxVertexOutputComponents = 128, - .maxTessellationGenerationLevel = 0, - .maxTessellationPatchSize = 0, - .maxTessellationControlPerVertexInputComponents = 0, - .maxTessellationControlPerVertexOutputComponents = 0, - .maxTessellationControlPerPatchOutputComponents = 0, - .maxTessellationControlTotalOutputComponents = 0, - .maxTessellationEvaluationInputComponents = 0, - .maxTessellationEvaluationOutputComponents = 0, - .maxGeometryShaderInvocations = 32, - .maxGeometryInputComponents = 64, - .maxGeometryOutputComponents = 128, - .maxGeometryOutputVertices = 256, - .maxGeometryTotalOutputComponents = 1024, - .maxFragmentInputComponents = 128, - .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 2, - .maxFragmentCombinedOutputResources = 8, - .maxComputeSharedMemorySize = 32768, - .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, - .maxComputeWorkGroupSize = { - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - 16 * devinfo->max_cs_threads, - }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, - .maxSamplerLodBias = 16, - .maxSamplerAnisotropy = 16, - .maxViewports = MAX_VIEWPORTS, - .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { -16384.0, 16384.0 }, - .viewportSubPixelBits = 13, /* We take a float? */ - .minMemoryMapAlignment = 4096, /* A page */ - .minTexelBufferOffsetAlignment = 1, - .minUniformBufferOffsetAlignment = 1, - .minStorageBufferOffsetAlignment = 1, - .minTexelOffset = -8, - .maxTexelOffset = 7, - .minTexelGatherOffset = -8, - .maxTexelGatherOffset = 7, - .minInterpolationOffset = 0, /* FIXME */ - .maxInterpolationOffset = 0, /* FIXME */ - .subPixelInterpolationOffsetBits = 0, /* FIXME */ - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = (1 << 10), - .framebufferColorSampleCounts = sample_counts, - .framebufferDepthSampleCounts = sample_counts, - .framebufferStencilSampleCounts = sample_counts, - .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = MAX_RTS, - .sampledImageColorSampleCounts = sample_counts, - .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .sampledImageDepthSampleCounts = sample_counts, - .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = false, - .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000), - .maxClipDistances = 0 /* FIXME */, - .maxCullDistances = 0 /* FIXME */, - .maxCombinedClipAndCullDistances = 0 /* FIXME */, - .discreteQueuePriorities = 1, - .pointSizeRange = { 0.125, 255.875 }, - .lineWidthRange = { 0.0, 7.9921875 }, - .pointSizeGranularity = (1.0 / 8.0), - .lineWidthGranularity = (1.0 / 128.0), - .strictLines = false, /* FINISHME */ - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 128, - .optimalBufferCopyRowPitchAlignment = 128, - .nonCoherentAtomSize = 64, - }; - - *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 2), - .driverVersion = 1, - .vendorID = 0x8086, - .deviceID = pdevice->chipset_id, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - .limits = limits, - .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ - }; - - strcpy(pProperties->deviceName, pdevice->name); - anv_device_get_cache_uuid(pProperties->pipelineCacheUUID); -} - -void anv_GetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pCount, - VkQueueFamilyProperties* pQueueFamilyProperties) -{ - if (pQueueFamilyProperties == NULL) { - *pCount = 1; - return; - } - - assert(*pCount >= 1); - - *pQueueFamilyProperties = (VkQueueFamilyProperties) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 36, /* XXX: Real value here */ - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, - }; -} - -void anv_GetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties* pMemoryProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkDeviceSize heap_size; - - /* Reserve some wiggle room for the driver by exposing only 75% of the - * aperture to the heap. - */ - heap_size = 3 * physical_device->aperture_size / 4; - - if (physical_device->info->has_llc) { - /* Big core GPUs share LLC with the CPU and thus one memory type can be - * both cached and coherent at the same time. - */ - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } else { - /* The spec requires that we expose a host-visible, coherent memory - * type, but Atom GPUs don't share LLC. Thus we offer two memory types - * to give the application a choice between cached, but not coherent and - * coherent but uncached (WC though). - */ - pMemoryProperties->memoryTypeCount = 2; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - pMemoryProperties->memoryTypes[1] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } - - pMemoryProperties->memoryHeapCount = 1; - pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { - .size = heap_size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - }; -} - -PFN_vkVoidFunction anv_GetInstanceProcAddr( - VkInstance instance, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -/* The loader wants us to expose a second GetInstanceProcAddr function - * to work around certain LD_PRELOAD issues seen in apps. - */ -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( - VkInstance instance, - const char* pName); - -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( - VkInstance instance, - const char* pName) -{ - return anv_GetInstanceProcAddr(instance, pName); -} - -PFN_vkVoidFunction anv_GetDeviceProcAddr( - VkDevice device, - const char* pName) -{ - return anv_lookup_entrypoint(pName); -} - -static VkResult -anv_queue_init(struct anv_device *device, struct anv_queue *queue) -{ - queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - queue->device = device; - queue->pool = &device->surface_state_pool; - - return VK_SUCCESS; -} - -static void -anv_queue_finish(struct anv_queue *queue) -{ -} - -static struct anv_state -anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) -{ - struct anv_state state; - - state = anv_state_pool_alloc(pool, size, align); - memcpy(state.map, p, size); - - if (!pool->block_pool->device->info.has_llc) - anv_state_clflush(state); - - return state; -} - -struct gen8_border_color { - union { - float float32[4]; - uint32_t uint32[4]; - }; - /* Pad out to 64 bytes */ - uint32_t _pad[12]; -}; - -static void -anv_device_init_border_colors(struct anv_device *device) -{ - static const struct gen8_border_color border_colors[] = { - [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, - [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, - [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, - [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, - [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, - }; - - device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, - sizeof(border_colors), 64, - border_colors); -} - -VkResult -anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch) -{ - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo; - VkResult result = VK_SUCCESS; - uint32_t size; - int64_t timeout; - int ret; - - /* Kernel driver requires 8 byte aligned batch length */ - size = align_u32(batch->next - batch->start, 8); - assert(size < device->batch_bo_pool.bo_size); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo); - if (result != VK_SUCCESS) - return result; - - memcpy(bo.map, batch->start, size); - if (!device->info.has_llc) - anv_clflush_range(bo.map, size); - - exec2_objects[0].handle = bo.gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo.offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - ret = anv_gem_execbuffer(device, &execbuf); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - timeout = INT64_MAX; - ret = anv_gem_wait(device, bo.gem_handle, &timeout); - if (ret != 0) { - /* We don't know the real error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m"); - goto fail; - } - - fail: - anv_bo_pool_free(&device->batch_bo_pool, &bo); - - return result; -} - -VkResult anv_CreateDevice( - VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDevice* pDevice) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkResult result; - struct anv_device *device; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - device_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) - return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); - } - - anv_set_dispatch_devinfo(physical_device->info); - - device = anv_alloc2(&physical_device->instance->alloc, pAllocator, - sizeof(*device), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = physical_device->instance; - device->chipset_id = physical_device->chipset_id; - - if (pAllocator) - device->alloc = *pAllocator; - else - device->alloc = physical_device->instance->alloc; - - /* XXX(chadv): Can we dup() physicalDevice->fd here? */ - device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); - if (device->fd == -1) { - result = vk_error(VK_ERROR_INITIALIZATION_FAILED); - goto fail_device; - } - - device->context_id = anv_gem_create_context(device); - if (device->context_id == -1) { - result = vk_error(VK_ERROR_INITIALIZATION_FAILED); - goto fail_fd; - } - - device->info = *physical_device->info; - device->isl_dev = physical_device->isl_dev; - - pthread_mutex_init(&device->mutex, NULL); - - anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); - - anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); - - anv_state_pool_init(&device->dynamic_state_pool, - &device->dynamic_state_block_pool); - - anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024); - anv_pipeline_cache_init(&device->default_pipeline_cache, device); - - anv_block_pool_init(&device->surface_state_block_pool, device, 4096); - - anv_state_pool_init(&device->surface_state_pool, - &device->surface_state_block_pool); - - anv_bo_init_new(&device->workaround_bo, device, 1024); - - anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); - - anv_queue_init(device, &device->queue); - - switch (device->info.gen) { - case 7: - if (!device->info.is_haswell) - result = gen7_init_device_state(device); - else - result = gen75_init_device_state(device); - break; - case 8: - result = gen8_init_device_state(device); - break; - case 9: - result = gen9_init_device_state(device); - break; - default: - /* Shouldn't get here as we don't create physical devices for any other - * gens. */ - unreachable("unhandled gen"); - } - if (result != VK_SUCCESS) - goto fail_fd; - - result = anv_device_init_meta(device); - if (result != VK_SUCCESS) - goto fail_fd; - - anv_device_init_border_colors(device); - - *pDevice = anv_device_to_handle(device); - - return VK_SUCCESS; - - fail_fd: - close(device->fd); - fail_device: - anv_free(&device->alloc, device); - - return result; -} - -void anv_DestroyDevice( - VkDevice _device, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_queue_finish(&device->queue); - - anv_device_finish_meta(device); - -#ifdef HAVE_VALGRIND - /* We only need to free these to prevent valgrind errors. The backing - * BO will go away in a couple of lines so we don't actually leak. - */ - anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); -#endif - - anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); - anv_gem_close(device, device->workaround_bo.gem_handle); - - anv_bo_pool_finish(&device->batch_bo_pool); - anv_state_pool_finish(&device->dynamic_state_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); - anv_block_pool_finish(&device->instruction_block_pool); - anv_state_pool_finish(&device->surface_state_pool); - anv_block_pool_finish(&device->surface_state_block_pool); - anv_block_pool_finish(&device->scratch_block_pool); - - close(device->fd); - - pthread_mutex_destroy(&device->mutex); - - anv_free(&device->alloc, device); -} - -VkResult anv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(global_extensions); - return VK_SUCCESS; - } - - assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); - - *pPropertyCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); - - return VK_SUCCESS; -} - -VkResult anv_EnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(device_extensions); - return VK_SUCCESS; - } - - assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); - - *pPropertyCount = ARRAY_SIZE(device_extensions); - memcpy(pProperties, device_extensions, sizeof(device_extensions)); - - return VK_SUCCESS; -} - -VkResult anv_EnumerateInstanceLayerProperties( - uint32_t* pPropertyCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_LAYER_NOT_PRESENT); -} - -VkResult anv_EnumerateDeviceLayerProperties( - VkPhysicalDevice physicalDevice, - uint32_t* pPropertyCount, - VkLayerProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = 0; - return VK_SUCCESS; - } - - /* None supported at this time */ - return vk_error(VK_ERROR_LAYER_NOT_PRESENT); -} - -void anv_GetDeviceQueue( - VkDevice _device, - uint32_t queueNodeIndex, - uint32_t queueIndex, - VkQueue* pQueue) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - assert(queueIndex == 0); - - *pQueue = anv_queue_to_handle(&device->queue); -} - -VkResult anv_QueueSubmit( - VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo* pSubmits, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - struct anv_device *device = queue->device; - int ret; - - for (uint32_t i = 0; i < submitCount; i++) { - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - - for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++) - cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset; - } - } - - if (fence) { - ret = anv_gem_execbuffer(device, &fence->execbuf); - if (ret != 0) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "execbuf2 failed: %m"); - } - } - - return VK_SUCCESS; -} - -VkResult anv_QueueWaitIdle( - VkQueue _queue) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - - return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device)); -} - -VkResult anv_DeviceWaitIdle( - VkDevice _device) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_batch batch; - - uint32_t cmds[8]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN7_MI_NOOP); - - return anv_device_submit_simple_batch(device, &batch); -} - -VkResult -anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) -{ - bo->gem_handle = anv_gem_create(device, size); - if (!bo->gem_handle) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - bo->map = NULL; - bo->index = 0; - bo->offset = 0; - bo->size = size; - bo->is_winsys_bo = false; - - return VK_SUCCESS; -} - -VkResult anv_AllocateMemory( - VkDevice _device, - const VkMemoryAllocateInfo* pAllocateInfo, - const VkAllocationCallbacks* pAllocator, - VkDeviceMemory* pMem) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - VkResult result; - - assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); - - if (pAllocateInfo->allocationSize == 0) { - /* Apparently, this is allowed */ - *pMem = VK_NULL_HANDLE; - return VK_SUCCESS; - } - - /* We support exactly one memory heap. */ - assert(pAllocateInfo->memoryTypeIndex == 0 || - (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); - - /* FINISHME: Fail if allocation request exceeds heap size. */ - - mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* The kernel is going to give us whole pages anyway */ - uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); - - result = anv_bo_init_new(&mem->bo, device, alloc_size); - if (result != VK_SUCCESS) - goto fail; - - mem->type_index = pAllocateInfo->memoryTypeIndex; - - *pMem = anv_device_memory_to_handle(mem); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, mem); - - return result; -} - -void anv_FreeMemory( - VkDevice _device, - VkDeviceMemory _mem, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _mem); - - if (mem == NULL) - return; - - if (mem->bo.map) - anv_gem_munmap(mem->bo.map, mem->bo.size); - - if (mem->bo.gem_handle != 0) - anv_gem_close(device, mem->bo.gem_handle); - - anv_free2(&device->alloc, pAllocator, mem); -} - -VkResult anv_MapMemory( - VkDevice _device, - VkDeviceMemory _memory, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void** ppData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - - if (mem == NULL) { - *ppData = NULL; - return VK_SUCCESS; - } - - if (size == VK_WHOLE_SIZE) - size = mem->bo.size - offset; - - /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only - * takes a VkDeviceMemory pointer, it seems like only one map of the memory - * at a time is valid. We could just mmap up front and return an offset - * pointer here, but that may exhaust virtual memory on 32 bit - * userspace. */ - - uint32_t gem_flags = 0; - if (!device->info.has_llc && mem->type_index == 0) - gem_flags |= I915_MMAP_WC; - - /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ - uint64_t map_offset = offset & ~4095ull; - assert(offset >= map_offset); - uint64_t map_size = (offset + size) - map_offset; - - /* Let's map whole pages */ - map_size = align_u64(map_size, 4096); - - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, - map_offset, map_size, gem_flags); - mem->map_size = map_size; - - *ppData = mem->map + (offset - map_offset); - - return VK_SUCCESS; -} - -void anv_UnmapMemory( - VkDevice _device, - VkDeviceMemory _memory) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - - if (mem == NULL) - return; - - anv_gem_munmap(mem->map, mem->map_size); -} - -static void -clflush_mapped_ranges(struct anv_device *device, - uint32_t count, - const VkMappedMemoryRange *ranges) -{ - for (uint32_t i = 0; i < count; i++) { - ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); - void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); - void *end; - - if (ranges[i].offset + ranges[i].size > mem->map_size) - end = mem->map + mem->map_size; - else - end = mem->map + ranges[i].offset + ranges[i].size; - - while (p < end) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } - } -} - -VkResult anv_FlushMappedMemoryRanges( - VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - if (device->info.has_llc) - return VK_SUCCESS; - - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - - clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); - - return VK_SUCCESS; -} - -VkResult anv_InvalidateMappedMemoryRanges( - VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange* pMemoryRanges) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - if (device->info.has_llc) - return VK_SUCCESS; - - clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); - - /* Make sure no reads get moved up above the invalidate. */ - __builtin_ia32_mfence(); - - return VK_SUCCESS; -} - -void anv_GetBufferMemoryRequirements( - VkDevice device, - VkBuffer _buffer, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = buffer->size; - pMemoryRequirements->alignment = 16; -} - -void anv_GetImageMemoryRequirements( - VkDevice device, - VkImage _image, - VkMemoryRequirements* pMemoryRequirements) -{ - ANV_FROM_HANDLE(anv_image, image, _image); - - /* The Vulkan spec (git aaed022) says: - * - * memoryTypeBits is a bitfield and contains one bit set for every - * supported memory type for the resource. The bit `1<memoryTypeBits = 1; - - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; -} - -void anv_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements* pSparseMemoryRequirements) -{ - stub(); -} - -void anv_GetDeviceMemoryCommitment( - VkDevice device, - VkDeviceMemory memory, - VkDeviceSize* pCommittedMemoryInBytes) -{ - *pCommittedMemoryInBytes = 0; -} - -VkResult anv_BindBufferMemory( - VkDevice device, - VkBuffer _buffer, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - if (mem) { - buffer->bo = &mem->bo; - buffer->offset = memoryOffset; - } else { - buffer->bo = NULL; - buffer->offset = 0; - } - - return VK_SUCCESS; -} - -VkResult anv_BindImageMemory( - VkDevice device, - VkImage _image, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) -{ - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - ANV_FROM_HANDLE(anv_image, image, _image); - - if (mem) { - image->bo = &mem->bo; - image->offset = memoryOffset; - } else { - image->bo = NULL; - image->offset = 0; - } - - return VK_SUCCESS; -} - -VkResult anv_QueueBindSparse( - VkQueue queue, - uint32_t bindInfoCount, - const VkBindSparseInfo* pBindInfo, - VkFence fence) -{ - stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); -} - -VkResult anv_CreateFence( - VkDevice _device, - const VkFenceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFence* pFence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_fence *fence; - struct anv_batch batch; - VkResult result; - - const uint32_t fence_size = 128; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - - fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (fence == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_bo_init_new(&fence->bo, device, fence_size); - if (result != VK_SUCCESS) - goto fail; - - fence->bo.map = - anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0); - batch.next = batch.start = fence->bo.map; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END); - anv_batch_emit(&batch, GEN7_MI_NOOP); - - if (!device->info.has_llc) { - assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0); - assert(batch.next - fence->bo.map <= CACHELINE_SIZE); - __builtin_ia32_mfence(); - __builtin_ia32_clflush(fence->bo.map); - } - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = 0; - fence->execbuf.batch_len = batch.next - fence->bo.map; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - - fence->ready = false; - - *pFence = anv_fence_to_handle(fence); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, fence); - - return result; -} - -void anv_DestroyFence( - VkDevice _device, - VkFence _fence, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - anv_gem_munmap(fence->bo.map, fence->bo.size); - anv_gem_close(device, fence->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, fence); -} - -VkResult anv_ResetFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences) -{ - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; - } - - return VK_SUCCESS; -} - -VkResult anv_GetFenceStatus( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - int64_t t = 0; - int ret; - - if (fence->ready) - return VK_SUCCESS; - - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; - return VK_SUCCESS; - } - - return VK_NOT_READY; -} - -VkResult anv_WaitForFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t timeout) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed - * to block indefinitely timeouts <= 0. Unfortunately, this was broken - * for a couple of kernel releases. Since there's no way to know - * whether or not the kernel we're using is one of the broken ones, the - * best we can do is to clamp the timeout to INT64_MAX. This limits the - * maximum timeout from 584 years to 292 years - likely not a big deal. - */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - int64_t t = timeout; - - /* FIXME: handle !waitAll */ - - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) { - return VK_TIMEOUT; - } else if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem wait failed: %m"); - } - } - - return VK_SUCCESS; -} - -// Queue semaphore functions - -VkResult anv_CreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore) -{ - /* The DRM execbuffer ioctl always execute in-oder, even between different - * rings. As such, there's nothing to do for the user space semaphore. - */ - - *pSemaphore = (VkSemaphore)1; - - return VK_SUCCESS; -} - -void anv_DestroySemaphore( - VkDevice device, - VkSemaphore semaphore, - const VkAllocationCallbacks* pAllocator) -{ -} - -// Event functions - -VkResult anv_CreateEvent( - VkDevice _device, - const VkEventCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkEvent* pEvent) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_state state; - struct anv_event *event; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); - - state = anv_state_pool_alloc(&device->dynamic_state_pool, - sizeof(*event), 8); - event = state.map; - event->state = state; - event->semaphore = VK_EVENT_RESET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - *pEvent = anv_event_to_handle(event); - - return VK_SUCCESS; -} - -void anv_DestroyEvent( - VkDevice _device, - VkEvent _event, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_state_pool_free(&device->dynamic_state_pool, event->state); -} - -VkResult anv_GetEventStatus( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - if (!device->info.has_llc) { - /* Invalidate read cache before reading event written by GPU. */ - __builtin_ia32_clflush(event); - __builtin_ia32_mfence(); - - } - - return event->semaphore; -} - -VkResult anv_SetEvent( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - event->semaphore = VK_EVENT_SET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - return VK_SUCCESS; -} - -VkResult anv_ResetEvent( - VkDevice _device, - VkEvent _event) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_event, event, _event); - - event->semaphore = VK_EVENT_RESET; - - if (!device->info.has_llc) { - /* Make sure the writes we're flushing have landed. */ - __builtin_ia32_mfence(); - __builtin_ia32_clflush(event); - } - - return VK_SUCCESS; -} - -// Buffer functions - -VkResult anv_CreateBuffer( - VkDevice _device, - const VkBufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkBuffer* pBuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_buffer *buffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - - buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (buffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - buffer->size = pCreateInfo->size; - buffer->usage = pCreateInfo->usage; - buffer->bo = NULL; - buffer->offset = 0; - - *pBuffer = anv_buffer_to_handle(buffer); - - return VK_SUCCESS; -} - -void anv_DestroyBuffer( - VkDevice _device, - VkBuffer _buffer, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - anv_free2(&device->alloc, pAllocator, buffer); -} - -void -anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, - enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_buffer_surface_state(state.map, format, offset, range, - stride); - else - gen7_fill_buffer_surface_state(state.map, format, offset, range, - stride); - break; - case 8: - gen8_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - case 9: - gen9_fill_buffer_surface_state(state.map, format, offset, range, stride); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - -void anv_DestroySampler( - VkDevice _device, - VkSampler _sampler, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); - - anv_free2(&device->alloc, pAllocator, sampler); -} - -VkResult anv_CreateFramebuffer( - VkDevice _device, - const VkFramebufferCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFramebuffer* pFramebuffer) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_framebuffer *framebuffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); - - size_t size = sizeof(*framebuffer) + - sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; - framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (framebuffer == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - framebuffer->attachment_count = pCreateInfo->attachmentCount; - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - VkImageView _iview = pCreateInfo->pAttachments[i]; - framebuffer->attachments[i] = anv_image_view_from_handle(_iview); - } - - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - - *pFramebuffer = anv_framebuffer_to_handle(framebuffer); - - return VK_SUCCESS; -} - -void anv_DestroyFramebuffer( - VkDevice _device, - VkFramebuffer _fb, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); - - anv_free2(&device->alloc, pAllocator, fb); -} - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) - __attribute__ ((visibility ("default"))); - -void vkCmdDbgMarkerBegin( - VkCommandBuffer commandBuffer, - const char* pMarker) -{ -} - -void vkCmdDbgMarkerEnd( - VkCommandBuffer commandBuffer) -{ -} diff --git a/src/vulkan/anv_dump.c b/src/vulkan/anv_dump.c deleted file mode 100644 index b7fa28b..0000000 --- a/src/vulkan/anv_dump.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -/* This file contains utility functions for help debugging. They can be - * called from GDB or similar to help inspect images and buffers. - */ - -void -anv_dump_image_to_ppm(struct anv_device *device, - struct anv_image *image, unsigned miplevel, - unsigned array_layer, const char *filename) -{ - VkDevice vk_device = anv_device_to_handle(device); - VkResult result; - - VkExtent2D extent = { image->extent.width, image->extent.height }; - for (unsigned i = 0; i < miplevel; i++) { - extent.width = MAX2(1, extent.width / 2); - extent.height = MAX2(1, extent.height / 2); - } - - VkImage copy_image; - result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R8G8B8A8_UNORM, - .extent = (VkExtent3D) { extent.width, extent.height, 1 }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, - .flags = 0, - }, NULL, ©_image); - assert(result == VK_SUCCESS); - - VkMemoryRequirements reqs; - anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs); - - VkDeviceMemory memory; - result = anv_AllocateMemory(vk_device, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = reqs.size, - .memoryTypeIndex = 0, - }, NULL, &memory); - assert(result == VK_SUCCESS); - - result = anv_BindImageMemory(vk_device, copy_image, memory, 0); - assert(result == VK_SUCCESS); - - VkCommandPool commandPool; - result = anv_CreateCommandPool(vk_device, - &(VkCommandPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .queueFamilyIndex = 0, - .flags = 0, - }, NULL, &commandPool); - assert(result == VK_SUCCESS); - - VkCommandBuffer cmd; - result = anv_AllocateCommandBuffers(vk_device, - &(VkCommandBufferAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = commandPool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, - }, &cmd); - assert(result == VK_SUCCESS); - - result = anv_BeginCommandBuffer(cmd, - &(VkCommandBufferBeginInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }); - assert(result == VK_SUCCESS); - - anv_CmdBlitImage(cmd, - anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL, - copy_image, VK_IMAGE_LAYOUT_GENERAL, 1, - &(VkImageBlit) { - .srcSubresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = miplevel, - .baseArrayLayer = array_layer, - .layerCount = 1, - }, - .srcOffsets = { - { 0, 0, 0 }, - { extent.width, extent.height, 1 }, - }, - .dstSubresource = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .dstOffsets = { - { 0, 0, 0 }, - { extent.width, extent.height, 1 }, - }, - }, VK_FILTER_NEAREST); - - ANV_CALL(CmdPipelineBarrier)(cmd, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - true, 0, NULL, 0, NULL, 1, - &(VkImageMemoryBarrier) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_HOST_READ_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = 0, - .dstQueueFamilyIndex = 0, - .image = copy_image, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); - - result = anv_EndCommandBuffer(cmd); - assert(result == VK_SUCCESS); - - VkFence fence; - result = anv_CreateFence(vk_device, - &(VkFenceCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .flags = 0, - }, NULL, &fence); - assert(result == VK_SUCCESS); - - result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1, - &(VkSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &cmd, - }, fence); - assert(result == VK_SUCCESS); - - result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX); - assert(result == VK_SUCCESS); - - anv_DestroyFence(vk_device, fence, NULL); - anv_DestroyCommandPool(vk_device, commandPool, NULL); - - uint8_t *map; - result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map); - assert(result == VK_SUCCESS); - - VkSubresourceLayout layout; - anv_GetImageSubresourceLayout(vk_device, copy_image, - &(VkImageSubresource) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .arrayLayer = 0, - }, &layout); - - map += layout.offset; - - /* Now we can finally write the PPM file */ - FILE *file = fopen(filename, "wb"); - assert(file); - - fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height); - for (unsigned y = 0; y < extent.height; y++) { - uint8_t row[extent.width * 3]; - for (unsigned x = 0; x < extent.width; x++) { - row[x * 3 + 0] = map[x * 4 + 0]; - row[x * 3 + 1] = map[x * 4 + 1]; - row[x * 3 + 2] = map[x * 4 + 2]; - } - fwrite(row, 3, extent.width, file); - - map += layout.rowPitch; - } - fclose(file); - - anv_UnmapMemory(vk_device, memory); - anv_DestroyImage(vk_device, copy_image, NULL); - anv_FreeMemory(vk_device, memory, NULL); -} diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py deleted file mode 100644 index 1e4cfcb..0000000 --- a/src/vulkan/anv_entrypoints_gen.py +++ /dev/null @@ -1,324 +0,0 @@ -# coding=utf-8 -# -# Copyright © 2015 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# - -import fileinput, re, sys - -# Each function typedef in the vulkan.h header is all on one line and matches -# this regepx. We hope that won't change. - -p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);') - -entrypoints = [] - -# We generate a static hash table for entry point lookup -# (vkGetProcAddress). We use a linear congruential generator for our hash -# function and a power-of-two size table. The prime numbers are determined -# experimentally. - -none = 0xffff -hash_size = 256 -u32_mask = 2**32 - 1 -hash_mask = hash_size - 1 - -prime_factor = 5024183 -prime_step = 19 - -def hash(name): - h = 0; - for c in name: - h = (h * prime_factor + ord(c)) & u32_mask - - return h - -opt_header = False -opt_code = False - -if (sys.argv[1] == "header"): - opt_header = True - sys.argv.pop() -elif (sys.argv[1] == "code"): - opt_code = True - sys.argv.pop() - -# Parse the entry points in the header - -i = 0 -for line in fileinput.input(): - m = p.match(line) - if (m): - if m.group(2) == 'VoidFunction': - continue - fullname = "vk" + m.group(2) - h = hash(fullname) - entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) - i = i + 1 - -# For outputting entrypoints.h we generate a anv_EntryPoint() prototype -# per entry point. - -if opt_header: - print "/* This file generated from vk_gen.py, don't edit directly. */\n" - - print "struct anv_dispatch_table {" - print " union {" - print " void *entrypoints[%d];" % len(entrypoints) - print " struct {" - - for type, name, args, num, h in entrypoints: - print " %s (*%s)%s;" % (type, name, args) - print " };\n" - print " };\n" - print "};\n" - - print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n" - - for type, name, args, num, h in entrypoints: - print "%s anv_%s%s;" % (type, name, args) - print "%s gen7_%s%s;" % (type, name, args) - print "%s gen75_%s%s;" % (type, name, args) - print "%s gen8_%s%s;" % (type, name, args) - print "%s gen9_%s%s;" % (type, name, args) - print "%s anv_validate_%s%s;" % (type, name, args) - exit() - - - -print """/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* DO NOT EDIT! This is a generated file. */ - -#include "anv_private.h" - -struct anv_entrypoint { - uint32_t name; - uint32_t hash; -}; - -/* We use a big string constant to avoid lots of reloctions from the entry - * point table to lots of little strings. The entries in the entry point table - * store the index into this big string. - */ - -static const char strings[] =""" - -offsets = [] -i = 0; -for type, name, args, num, h in entrypoints: - print " \"vk%s\\0\"" % name - offsets.append(i) - i += 2 + len(name) + 1 -print """ ; - -/* Weak aliases for all potential validate functions. These will resolve to - * NULL if they're not defined, which lets the resolve_entrypoint() function - * either pick a validate wrapper if available or just plug in the actual - * entry point. - */ -""" - -# Now generate the table of all entry points and their validation functions - -print "\nstatic const struct anv_entrypoint entrypoints[] = {" -for type, name, args, num, h in entrypoints: - print " { %5d, 0x%08x }," % (offsets[num], h) -print "};\n" - -for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]: - for type, name, args, num, h in entrypoints: - print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args) - print "\nconst struct anv_dispatch_table %s_layer = {" % layer - for type, name, args, num, h in entrypoints: - print " .%s = %s_%s," % (name, layer, name) - print "};\n" - -print """ -#ifdef DEBUG -static bool enable_validate = true; -#else -static bool enable_validate = false; -#endif - -/* We can't use symbols that need resolving (like, oh, getenv) in the resolve - * function. This means that we have to determine whether or not to use the - * validation layer sometime before that. The constructor function attribute asks - * the dynamic linker to invoke determine_validate() at dlopen() time which - * works. - */ -static void __attribute__ ((constructor)) -determine_validate(void) -{ - const char *s = getenv("ANV_VALIDATE"); - - if (s) - enable_validate = atoi(s); -} - -static const struct brw_device_info *dispatch_devinfo; - -void -anv_set_dispatch_devinfo(const struct brw_device_info *devinfo) -{ - dispatch_devinfo = devinfo; -} - -void * __attribute__ ((noinline)) -anv_resolve_entrypoint(uint32_t index) -{ - if (enable_validate && validate_layer.entrypoints[index]) - return validate_layer.entrypoints[index]; - - if (dispatch_devinfo == NULL) { - assert(anv_layer.entrypoints[index]); - return anv_layer.entrypoints[index]; - } - - switch (dispatch_devinfo->gen) { - case 9: - if (gen9_layer.entrypoints[index]) - return gen9_layer.entrypoints[index]; - /* fall through */ - case 8: - if (gen8_layer.entrypoints[index]) - return gen8_layer.entrypoints[index]; - /* fall through */ - case 7: - if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index]) - return gen75_layer.entrypoints[index]; - - if (gen7_layer.entrypoints[index]) - return gen7_layer.entrypoints[index]; - /* fall through */ - case 0: - return anv_layer.entrypoints[index]; - default: - unreachable("unsupported gen\\n"); - } -} -""" - -# Now output ifuncs and their resolve helpers for all entry points. The -# resolve helper calls resolve_entrypoint() with the entry point index, which -# lets the resolver look it up in the table. - -for type, name, args, num, h in entrypoints: - print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num) - print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) - - -# Now generate the hash table used for entry point look up. This is a -# uint16_t table of entry point indices. We use 0xffff to indicate an entry -# in the hash table is empty. - -map = [none for f in xrange(hash_size)] -collisions = [0 for f in xrange(10)] -for type, name, args, num, h in entrypoints: - level = 0 - while map[h & hash_mask] != none: - h = h + prime_step - level = level + 1 - if level > 9: - collisions[9] += 1 - else: - collisions[level] += 1 - map[h & hash_mask] = num - -print "/* Hash table stats:" -print " * size %d entries" % hash_size -print " * collisions entries" -for i in xrange(10): - if (i == 9): - plus = "+" - else: - plus = " " - - print " * %2d%s %4d" % (i, plus, collisions[i]) -print " */\n" - -print "#define none 0x%04x\n" % none - -print "static const uint16_t map[] = {" -for i in xrange(0, hash_size, 8): - print " ", - for j in xrange(i, i + 8): - if map[j] & 0xffff == 0xffff: - print " none,", - else: - print "0x%04x," % (map[j] & 0xffff), - print - -print "};" - -# Finally we generate the hash table lookup function. The hash function and -# linear probing algorithm matches the hash table generated above. - -print """ -void * -anv_lookup_entrypoint(const char *name) -{ - static const uint32_t prime_factor = %d; - static const uint32_t prime_step = %d; - const struct anv_entrypoint *e; - uint32_t hash, h, i; - const char *p; - - hash = 0; - for (p = name; *p; p++) - hash = hash * prime_factor + *p; - - h = hash; - do { - i = map[h & %d]; - if (i == none) - return NULL; - e = &entrypoints[i]; - h += prime_step; - } while (e->hash != hash); - - if (strcmp(name, strings + e->name) != 0) - return NULL; - - return anv_resolve_entrypoint(i); -} -""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c deleted file mode 100644 index 7798a7b..0000000 --- a/src/vulkan/anv_formats.c +++ /dev/null @@ -1,603 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" -#include "brw_surface_formats.h" - -#define RGBA ((struct anv_format_swizzle) { 0, 1, 2, 3 }) -#define BGRA ((struct anv_format_swizzle) { 2, 1, 0, 3 }) - -#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \ - [__vk_fmt] = { \ - .vk_format = __vk_fmt, \ - .name = #__vk_fmt, \ - .isl_format = __hw_fmt, \ - .isl_layout = &isl_format_layouts[__hw_fmt], \ - .swizzle = __swizzle, \ - __VA_ARGS__ \ - } - -#define fmt(__vk_fmt, __hw_fmt, ...) \ - swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__) - -/* HINT: For array formats, the ISL name should match the VK name. For - * packed formats, they should have the channels in reverse order from each - * other. The reason for this is that, for packed formats, the ISL (and - * bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB. - */ -static const struct anv_format anv_formats[] = { - fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW), - fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM), - swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA), - fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM), - swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA), - fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM), - fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM), - fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM), - fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM), - fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED), - fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED), - fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT), - fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT), - fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM), - fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM), - fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED), - fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED), - fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT), - fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT), - fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM), - fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM), - fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED), - fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED), - fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT), - fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT), - fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ - fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM), - fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM), - fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED), - fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED), - fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT), - fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT), - fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), - fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM), - fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM), - fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED), - fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED), - fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT), - fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT), - fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB), - fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM), - fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM), - fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED), - fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED), - fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT), - fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT), - fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM), - fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM), - fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED), - fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED), - fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT), - fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT), - fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM), - fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM), - fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED), - fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED), - fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT), - fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT), - fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT), - fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM), - fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM), - fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED), - fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED), - fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT), - fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT), - fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT), - fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM), - fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM), - fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED), - fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED), - fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT), - fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT), - fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT), - fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM), - fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM), - fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED), - fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED), - fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT), - fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT), - fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT), - fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,), - fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,), - fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,), - fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,), - fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,), - fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,), - fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,), - fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,), - fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,), - fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,), - fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,), - fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,), - fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU), - fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU), - fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT), - fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU), - fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU), - fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT), - fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT), - fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU), - fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT), - fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), - fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), - - fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true), - fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true), - fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true), - fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true), - fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_R16_UNORM, .has_depth = true, .has_stencil = true), - fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true), - fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true), - - fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB), - fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB), - fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM), - fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB), - fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM), - fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB), - fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM), - fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB), - fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM), - fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM), - fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM), - fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM), - fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16), - fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16), - fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM), - fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB), - fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8), - fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8), - fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA), - fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8), - fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8), - fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11), - fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11), - fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11), - fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11), - fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM), - fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED), - fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB), -}; - -#undef fmt - -const struct anv_format * -anv_format_for_vk_format(VkFormat format) -{ - return &anv_formats[format]; -} - -/** - * Exactly one bit must be set in \a aspect. - */ -enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling, struct anv_format_swizzle *swizzle) -{ - const struct anv_format *anv_fmt = &anv_formats[format]; - - if (swizzle) - *swizzle = anv_fmt->swizzle; - - switch (aspect) { - case VK_IMAGE_ASPECT_COLOR_BIT: - if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) { - return ISL_FORMAT_UNSUPPORTED; - } else if (tiling == VK_IMAGE_TILING_OPTIMAL && - !util_is_power_of_two(anv_fmt->isl_layout->bs)) { - /* Tiled formats *must* be power-of-two because we need up upload - * them with the render pipeline. For 3-channel formats, we fix - * this by switching them over to RGBX or RGBA formats under the - * hood. - */ - enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format); - if (rgbx != ISL_FORMAT_UNSUPPORTED) - return rgbx; - else - return isl_format_rgb_to_rgba(anv_fmt->isl_format); - } else { - return anv_fmt->isl_format; - } - - case VK_IMAGE_ASPECT_DEPTH_BIT: - case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT): - assert(anv_fmt->has_depth); - return anv_fmt->isl_format; - - case VK_IMAGE_ASPECT_STENCIL_BIT: - assert(anv_fmt->has_stencil); - return ISL_FORMAT_R8_UINT; - - default: - unreachable("bad VkImageAspect"); - return ISL_FORMAT_UNSUPPORTED; - } -} - -// Format capabilities - -void anv_validate_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat _format, - VkFormatProperties* pFormatProperties) -{ - const struct anv_format *format = anv_format_for_vk_format(_format); - fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); - anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); -} - -static VkFormatFeatureFlags -get_image_format_properties(int gen, enum isl_format base, - enum isl_format actual, - struct anv_format_swizzle swizzle) -{ - const struct brw_surface_format_info *info = &surface_formats[actual]; - - if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists) - return 0; - - VkFormatFeatureFlags flags = 0; - if (info->sampling <= gen) { - flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_BLIT_SRC_BIT; - - if (info->filtering <= gen) - flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - } - - /* We can render to swizzled formats. However, if the alpha channel is - * moved, then blending won't work correctly. The PRM tells us - * straight-up not to render to such a surface. - */ - if (info->render_target <= gen && swizzle.a == 3) { - flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - - if (info->alpha_blend <= gen && swizzle.a == 3) - flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; - - /* Load/store is determined based on base format. This prevents RGB - * formats from showing up as load/store capable. - */ - if (isl_is_storage_image_format(base)) - flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - - if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT) - flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; - - return flags; -} - -static VkFormatFeatureFlags -get_buffer_format_properties(int gen, enum isl_format format) -{ - const struct brw_surface_format_info *info = &surface_formats[format]; - - if (format == ISL_FORMAT_UNSUPPORTED || !info->exists) - return 0; - - VkFormatFeatureFlags flags = 0; - if (info->sampling <= gen && !isl_format_is_compressed(format)) - flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - - if (info->input_vb <= gen) - flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - - if (isl_is_storage_image_format(format)) - flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; - - if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT) - flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; - - return flags; -} - -static void -anv_physical_device_get_format_properties(struct anv_physical_device *physical_device, - VkFormat format, - VkFormatProperties *out_properties) -{ - int gen = physical_device->info->gen * 10; - if (physical_device->info->is_haswell) - gen += 5; - - VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; - if (anv_format_is_depth_or_stencil(&anv_formats[format])) { - tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - if (physical_device->info->gen >= 8) { - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; - } - if (anv_formats[format].has_depth) { - tiled |= VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - } else { - enum isl_format linear_fmt, tiled_fmt; - struct anv_format_swizzle linear_swizzle, tiled_swizzle; - linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR, &linear_swizzle); - tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle); - - linear = get_image_format_properties(gen, linear_fmt, linear_fmt, - linear_swizzle); - tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt, - tiled_swizzle); - buffer = get_buffer_format_properties(gen, linear_fmt); - - /* XXX: We handle 3-channel formats by switching them out for RGBX or - * RGBA formats behind-the-scenes. This works fine for textures - * because the upload process will fill in the extra channel. - * We could also support it for render targets, but it will take - * substantially more work and we have enough RGBX formats to handle - * what most clients will want. - */ - if (linear_fmt != ISL_FORMAT_UNSUPPORTED && - !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) && - isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) { - tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT & - ~VK_FORMAT_FEATURE_BLIT_DST_BIT; - } - } - - out_properties->linearTilingFeatures = linear; - out_properties->optimalTilingFeatures = tiled; - out_properties->bufferFeatures = buffer; - - return; -} - - -void anv_GetPhysicalDeviceFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties* pFormatProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - anv_physical_device_get_format_properties( - physical_device, - format, - pFormatProperties); -} - -VkResult anv_GetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags createFlags, - VkImageFormatProperties* pImageFormatProperties) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - VkFormatProperties format_props; - VkFormatFeatureFlags format_feature_flags; - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArraySize; - VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; - - anv_physical_device_get_format_properties(physical_device, format, - &format_props); - - /* Extract the VkFormatFeatureFlags that are relevant for the queried - * tiling. - */ - if (tiling == VK_IMAGE_TILING_LINEAR) { - format_feature_flags = format_props.linearTilingFeatures; - } else if (tiling == VK_IMAGE_TILING_OPTIMAL) { - format_feature_flags = format_props.optimalTilingFeatures; - } else { - unreachable("bad VkImageTiling"); - } - - switch (type) { - default: - unreachable("bad VkImageType"); - case VK_IMAGE_TYPE_1D: - maxExtent.width = 16384; - maxExtent.height = 1; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - sampleCounts = VK_SAMPLE_COUNT_1_BIT; - break; - case VK_IMAGE_TYPE_2D: - /* FINISHME: Does this really differ for cube maps? The documentation - * for RENDER_SURFACE_STATE suggests so. - */ - maxExtent.width = 16384; - maxExtent.height = 16384; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - break; - case VK_IMAGE_TYPE_3D: - maxExtent.width = 2048; - maxExtent.height = 2048; - maxExtent.depth = 2048; - maxMipLevels = 12; /* log2(maxWidth) + 1 */ - maxArraySize = 1; - break; - } - - if (tiling == VK_IMAGE_TILING_OPTIMAL && - type == VK_IMAGE_TYPE_2D && - (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev); - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta implements transfers by sampling from the source image. */ - if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { - goto unsupported; - } - } - -#if 0 - if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - if (anv_format_for_vk_format(format)->has_stencil) { - /* Not yet implemented because copying to a W-tiled surface is crazy - * hard. - */ - anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for " - "stencil format"); - goto unsupported; - } - } -#endif - - if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_STORAGE_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) { - /* Nothing to check. */ - } - - if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { - /* Ignore this flag because it was removed from the - * provisional_I_20150910 header. - */ - } - - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = maxExtent, - .maxMipLevels = maxMipLevels, - .maxArrayLayers = maxArraySize, - .sampleCounts = sampleCounts, - - /* FINISHME: Accurately calculate - * VkImageFormatProperties::maxResourceSize. - */ - .maxResourceSize = UINT32_MAX, - }; - - return VK_SUCCESS; - -unsupported: - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = { 0, 0, 0 }, - .maxMipLevels = 0, - .maxArrayLayers = 0, - .sampleCounts = 0, - .maxResourceSize = 0, - }; - - return VK_SUCCESS; -} - -void anv_GetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - uint32_t samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t* pNumProperties, - VkSparseImageFormatProperties* pProperties) -{ - /* Sparse images are not yet supported. */ - *pNumProperties = 0; -} diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c deleted file mode 100644 index 0a7be35..0000000 --- a/src/vulkan/anv_gem.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) - -static int -anv_ioctl(int fd, unsigned long request, void *arg) -{ - int ret; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_CREATE. - * - * Return gem handle, or 0 on failure. Gem handles are never 0. - */ -uint32_t -anv_gem_create(struct anv_device *device, size_t size) -{ - struct drm_i915_gem_create gem_create; - int ret; - - VG_CLEAR(gem_create); - gem_create.size = size; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); - if (ret != 0) { - /* FIXME: What do we do if this fails? */ - return 0; - } - - return gem_create.handle; -} - -void -anv_gem_close(struct anv_device *device, uint32_t gem_handle) -{ - struct drm_gem_close close; - - VG_CLEAR(close); - close.handle = gem_handle; - anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); -} - -/** - * Wrapper around DRM_IOCTL_I915_GEM_MMAP. - */ -void* -anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size, uint32_t flags) -{ - struct drm_i915_gem_mmap gem_mmap; - int ret; - - gem_mmap.handle = gem_handle; - VG_CLEAR(gem_mmap.pad); - gem_mmap.offset = offset; - gem_mmap.size = size; - VG_CLEAR(gem_mmap.addr_ptr); - gem_mmap.flags = flags; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); - if (ret != 0) { - /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ - return NULL; - } - - VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); - return (void *)(uintptr_t) gem_mmap.addr_ptr; -} - -/* This is just a wrapper around munmap, but it also notifies valgrind that - * this map is no longer valid. Pair this with anv_gem_mmap(). - */ -void -anv_gem_munmap(void *p, uint64_t size) -{ - VG(VALGRIND_FREELIKE_BLOCK(p, 0)); - munmap(p, size); -} - -uint32_t -anv_gem_userptr(struct anv_device *device, void *mem, size_t size) -{ - struct drm_i915_gem_userptr userptr; - int ret; - - VG_CLEAR(userptr); - userptr.user_ptr = (__u64)((unsigned long) mem); - userptr.user_size = size; - userptr.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); - if (ret == -1) - return 0; - - return userptr.handle; -} - -int -anv_gem_set_caching(struct anv_device *device, - uint32_t gem_handle, uint32_t caching) -{ - struct drm_i915_gem_caching gem_caching; - - VG_CLEAR(gem_caching); - gem_caching.handle = gem_handle; - gem_caching.caching = caching; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching); -} - -int -anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain) -{ - struct drm_i915_gem_set_domain gem_set_domain; - - VG_CLEAR(gem_set_domain); - gem_set_domain.handle = gem_handle; - gem_set_domain.read_domains = read_domains; - gem_set_domain.write_domain = write_domain; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain); -} - -/** - * On error, \a timeout_ns holds the remaining time. - */ -int -anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) -{ - struct drm_i915_gem_wait wait; - int ret; - - VG_CLEAR(wait); - wait.bo_handle = gem_handle; - wait.timeout_ns = *timeout_ns; - wait.flags = 0; - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); - *timeout_ns = wait.timeout_ns; - - return ret; -} - -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); -} - -int -anv_gem_set_tiling(struct anv_device *device, - uint32_t gem_handle, uint32_t stride, uint32_t tiling) -{ - struct drm_i915_gem_set_tiling set_tiling; - int ret; - - /* set_tiling overwrites the input on the error path, so we have to open - * code anv_ioctl. - */ - - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = stride; - - ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -int -anv_gem_get_param(int fd, uint32_t param) -{ - drm_i915_getparam_t gp; - int ret, tmp; - - VG_CLEAR(gp); - gp.param = param; - gp.value = &tmp; - ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == 0) - return tmp; - - return 0; -} - -bool -anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) -{ - struct drm_gem_close close; - int ret; - - struct drm_i915_gem_create gem_create; - VG_CLEAR(gem_create); - gem_create.size = 4096; - - if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) { - assert(!"Failed to create GEM BO"); - return false; - } - - bool swizzled = false; - - /* set_tiling overwrites the input on the error path, so we have to open - * code anv_ioctl. - */ - struct drm_i915_gem_set_tiling set_tiling; - do { - VG_CLEAR(set_tiling); - set_tiling.handle = gem_create.handle; - set_tiling.tiling_mode = tiling; - set_tiling.stride = tiling == I915_TILING_X ? 512 : 128; - - ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - if (ret != 0) { - assert(!"Failed to set BO tiling"); - goto close_and_return; - } - - struct drm_i915_gem_get_tiling get_tiling; - VG_CLEAR(get_tiling); - get_tiling.handle = gem_create.handle; - - if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) { - assert(!"Failed to get BO tiling"); - goto close_and_return; - } - - swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE; - -close_and_return: - - VG_CLEAR(close); - close.handle = gem_create.handle; - anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); - - return swizzled; -} - -int -anv_gem_create_context(struct anv_device *device) -{ - struct drm_i915_gem_context_create create; - int ret; - - VG_CLEAR(create); - - ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); - if (ret == -1) - return -1; - - return create.ctx_id; -} - -int -anv_gem_destroy_context(struct anv_device *device, int context) -{ - struct drm_i915_gem_context_destroy destroy; - - VG_CLEAR(destroy); - destroy.ctx_id = context; - - return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); -} - -int -anv_gem_get_aperture(int fd, uint64_t *size) -{ - struct drm_i915_gem_get_aperture aperture; - int ret; - - VG_CLEAR(aperture); - ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); - if (ret == -1) - return -1; - - *size = aperture.aper_available_size; - - return 0; -} - -int -anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.handle = gem_handle; - args.flags = DRM_CLOEXEC; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); - if (ret == -1) - return -1; - - return args.fd; -} - -uint32_t -anv_gem_fd_to_handle(struct anv_device *device, int fd) -{ - struct drm_prime_handle args; - int ret; - - VG_CLEAR(args); - args.fd = fd; - - ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); - if (ret == -1) - return 0; - - return args.handle; -} diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c deleted file mode 100644 index 3204fef..0000000 --- a/src/vulkan/anv_gem_stubs.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _DEFAULT_SOURCE - -#include -#include -#include - -#include "anv_private.h" - -static inline int -memfd_create(const char *name, unsigned int flags) -{ - return syscall(SYS_memfd_create, name, flags); -} - -uint32_t -anv_gem_create(struct anv_device *device, size_t size) -{ - int fd = memfd_create("fake bo", MFD_CLOEXEC); - if (fd == -1) - return 0; - - assert(fd != 0); - - if (ftruncate(fd, size) == -1) - return 0; - - return fd; -} - -void -anv_gem_close(struct anv_device *device, uint32_t gem_handle) -{ - close(gem_handle); -} - -void* -anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, - uint64_t offset, uint64_t size, uint32_t flags) -{ - /* Ignore flags, as they're specific to I915_GEM_MMAP. */ - (void) flags; - - return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, - gem_handle, offset); -} - -/* This is just a wrapper around munmap, but it also notifies valgrind that - * this map is no longer valid. Pair this with anv_gem_mmap(). - */ -void -anv_gem_munmap(void *p, uint64_t size) -{ - munmap(p, size); -} - -uint32_t -anv_gem_userptr(struct anv_device *device, void *mem, size_t size) -{ - return -1; -} - -int -anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns) -{ - return 0; -} - -int -anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf) -{ - return 0; -} - -int -anv_gem_set_tiling(struct anv_device *device, - uint32_t gem_handle, uint32_t stride, uint32_t tiling) -{ - return 0; -} - -int -anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, - uint32_t caching) -{ - return 0; -} - -int -anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain) -{ - return 0; -} - -int -anv_gem_get_param(int fd, uint32_t param) -{ - unreachable("Unused"); -} - -bool -anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) -{ - unreachable("Unused"); -} - -int -anv_gem_create_context(struct anv_device *device) -{ - unreachable("Unused"); -} - -int -anv_gem_destroy_context(struct anv_device *device, int context) -{ - unreachable("Unused"); -} - -int -anv_gem_get_aperture(int fd, uint64_t *size) -{ - unreachable("Unused"); -} - -int -anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) -{ - unreachable("Unused"); -} - -uint32_t -anv_gem_fd_to_handle(struct anv_device *device, int fd) -{ - unreachable("Unused"); -} diff --git a/src/vulkan/anv_gen_macros.h b/src/vulkan/anv_gen_macros.h deleted file mode 100644 index ef2ecd5..0000000 --- a/src/vulkan/anv_gen_macros.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -/* Macros for handling per-gen compilation. - * - * The prefixing macros GENX() and genX() automatically prefix whatever you - * give them by GENX_ or genX_ where X is the gen number. - * - * You can declare a function to be used on some range of gens like this: - * - * GENX_FUNC(GEN7, GEN75) void - * genX(my_function_name)(args...) - * { - * // Do stuff - * } - * - * If the file is compiled for any set of gens containing gen7 and gen75, - * the function will effectively only get compiled twice as - * gen7_my_function_nmae and gen75_my_function_name. The function has to - * be compilable on all gens, but it will become a static inline that gets - * discarded by the compiler on all gens not in range. - * - * You can do pseudo-runtime checks in your function such as - * - * if (ANV_GEN > 8 || ANV_IS_HASWELL) { - * // Do something - * } - * - * The contents of the if statement must be valid regardless of gen, but - * the if will get compiled away on everything except haswell. - * - * For places where you really do have a compile-time conflict, you can - * use preprocessor logic: - * - * #if (ANV_GEN > 8 || ANV_IS_HASWELL) - * // Do something - * #endif - * - * However, it is strongly recommended that the former be used whenever - * possible. - */ - -/* Base macro defined on the command line. If we don't have this, we can't - * do anything. - */ -#ifdef ANV_GENx10 - -/* Gen checking macros */ -#define ANV_GEN ((ANV_GENx10) / 10) -#define ANV_IS_HASWELL ((ANV_GENx10) == 75) - -/* Prefixing macros */ -#if (ANV_GENx10 == 70) -# define GENX(X) GEN7_##X -# define genX(x) gen7_##x -#elif (ANV_GENx10 == 75) -# define GENX(X) GEN75_##X -# define genX(x) gen75_##x -#elif (ANV_GENx10 == 80) -# define GENX(X) GEN8_##X -# define genX(x) gen8_##x -#elif (ANV_GENx10 == 90) -# define GENX(X) GEN9_##X -# define genX(x) gen9_##x -#else -# error "Need to add prefixing macros for your gen" -#endif - -/* Macros for comparing gens */ -#if (ANV_GENx10 >= 70) -#define __ANV_GEN_GE_GEN7(T, F) T -#else -#define __ANV_GEN_GE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 <= 70) -#define __ANV_GEN_LE_GEN7(T, F) T -#else -#define __ANV_GEN_LE_GEN7(T, F) F -#endif - -#if (ANV_GENx10 >= 75) -#define __ANV_GEN_GE_GEN75(T, F) T -#else -#define __ANV_GEN_GE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 <= 75) -#define __ANV_GEN_LE_GEN75(T, F) T -#else -#define __ANV_GEN_LE_GEN75(T, F) F -#endif - -#if (ANV_GENx10 >= 80) -#define __ANV_GEN_GE_GEN8(T, F) T -#else -#define __ANV_GEN_GE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 <= 80) -#define __ANV_GEN_LE_GEN8(T, F) T -#else -#define __ANV_GEN_LE_GEN8(T, F) F -#endif - -#if (ANV_GENx10 >= 90) -#define __ANV_GEN_GE_GEN9(T, F) T -#else -#define __ANV_GEN_GE_GEN9(T, F) F -#endif - -#if (ANV_GENx10 <= 90) -#define __ANV_GEN_LE_GEN9(T, F) T -#else -#define __ANV_GEN_LE_GEN9(T, F) F -#endif - -#define __ANV_GEN_IN_RANGE(start, end, T, F) \ - __ANV_GEN_GE_##start(__ANV_GEN_LE_##end(T, F), F) - -/* Declares a function as static inlind if it's not in range */ -#define GENX_FUNC(start, end) __ANV_GEN_IN_RANGE(start, end, , static inline) - -#endif /* ANV_GENx10 */ diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c deleted file mode 100644 index 0a412a3..0000000 --- a/src/vulkan/anv_image.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** - * Exactly one bit must be set in \a aspect. - */ -static isl_surf_usage_flags_t -choose_isl_surf_usage(VkImageUsageFlags vk_usage, - VkImageAspectFlags aspect) -{ - isl_surf_usage_flags_t isl_usage = 0; - - /* FINISHME: Support aux surfaces */ - isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT; - - if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - - if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) - isl_usage |= ISL_SURF_USAGE_CUBE_BIT; - - if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - switch (aspect) { - default: - unreachable("bad VkImageAspect"); - case VK_IMAGE_ASPECT_DEPTH_BIT: - isl_usage |= ISL_SURF_USAGE_DEPTH_BIT; - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - isl_usage |= ISL_SURF_USAGE_STENCIL_BIT; - break; - } - } - - if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta implements transfers by sampling from the source image. */ - isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT; - } - - if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - /* Meta implements transfers by rendering into the destination image. */ - isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT; - } - - return isl_usage; -} - -/** - * Exactly one bit must be set in \a aspect. - */ -static struct anv_surface * -get_surface(struct anv_image *image, VkImageAspectFlags aspect) -{ - switch (aspect) { - default: - unreachable("bad VkImageAspect"); - case VK_IMAGE_ASPECT_COLOR_BIT: - return &image->color_surface; - case VK_IMAGE_ASPECT_DEPTH_BIT: - return &image->depth_surface; - case VK_IMAGE_ASPECT_STENCIL_BIT: - return &image->stencil_surface; - } -} - -/** - * Initialize the anv_image::*_surface selected by \a aspect. Then update the - * image's memory requirements (that is, the image's size and alignment). - * - * Exactly one bit must be set in \a aspect. - */ -static VkResult -make_surface(const struct anv_device *dev, - struct anv_image *image, - const struct anv_image_create_info *anv_info, - VkImageAspectFlags aspect) -{ - const VkImageCreateInfo *vk_info = anv_info->vk_info; - bool ok UNUSED; - - static const enum isl_surf_dim vk_to_isl_surf_dim[] = { - [VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D, - [VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D, - [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, - }; - - isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags; - if (vk_info->tiling == VK_IMAGE_TILING_LINEAR) - tiling_flags &= ISL_TILING_LINEAR_BIT; - - struct anv_surface *anv_surf = get_surface(image, aspect); - - VkExtent3D extent; - switch (vk_info->imageType) { - case VK_IMAGE_TYPE_1D: - extent = (VkExtent3D) { vk_info->extent.width, 1, 1 }; - break; - case VK_IMAGE_TYPE_2D: - extent = (VkExtent3D) { vk_info->extent.width, vk_info->extent.height, 1 }; - break; - case VK_IMAGE_TYPE_3D: - extent = vk_info->extent; - break; - default: - unreachable("invalid image type"); - } - - image->extent = extent; - - ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, - .dim = vk_to_isl_surf_dim[vk_info->imageType], - .format = anv_get_isl_format(vk_info->format, aspect, - vk_info->tiling, NULL), - .width = extent.width, - .height = extent.height, - .depth = extent.depth, - .levels = vk_info->mipLevels, - .array_len = vk_info->arrayLayers, - .samples = vk_info->samples, - .min_alignment = 0, - .min_pitch = 0, - .usage = choose_isl_surf_usage(image->usage, aspect), - .tiling_flags = tiling_flags); - - /* isl_surf_init() will fail only if provided invalid input. Invalid input - * is illegal in Vulkan. - */ - assert(ok); - - anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment); - image->size = anv_surf->offset + anv_surf->isl.size; - image->alignment = MAX(image->alignment, anv_surf->isl.alignment); - - return VK_SUCCESS; -} - -/** - * Parameter @a format is required and overrides VkImageCreateInfo::format. - */ -static VkImageUsageFlags -anv_image_get_full_usage(const VkImageCreateInfo *info, - const struct anv_format *format) -{ - VkImageUsageFlags usage = info->usage; - - if (info->samples > 1 && - (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { - /* Meta will resolve the image by binding it as a texture. */ - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { - /* Meta will transfer from the image by binding it as a texture. */ - usage |= VK_IMAGE_USAGE_SAMPLED_BIT; - } - - if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { - /* For non-clear transfer operations, meta will transfer to the image by - * binding it as a color attachment, even if the image format is not - * a color format. - */ - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - if (anv_format_is_depth_or_stencil(format)) { - /* vkCmdClearDepthStencilImage() only requires that - * VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does - * not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta - * clears the image, though, by binding it as a depthstencil - * attachment. - */ - usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - } - } - - return usage; -} - -VkResult -anv_image_create(VkDevice _device, - const struct anv_image_create_info *create_info, - const VkAllocationCallbacks* alloc, - VkImage *pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; - struct anv_image *image = NULL; - const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format); - VkResult r; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - - anv_assert(pCreateInfo->mipLevels > 0); - anv_assert(pCreateInfo->arrayLayers > 0); - anv_assert(pCreateInfo->samples > 0); - anv_assert(pCreateInfo->extent.width > 0); - anv_assert(pCreateInfo->extent.height > 0); - anv_assert(pCreateInfo->extent.depth > 0); - - image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!image) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(image, 0, sizeof(*image)); - image->type = pCreateInfo->imageType; - image->extent = pCreateInfo->extent; - image->vk_format = pCreateInfo->format; - image->format = format; - image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arrayLayers; - image->samples = pCreateInfo->samples; - image->usage = anv_image_get_full_usage(pCreateInfo, format); - image->tiling = pCreateInfo->tiling; - - if (likely(anv_format_is_color(format))) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_COLOR_BIT); - if (r != VK_SUCCESS) - goto fail; - } else { - if (image->format->has_depth) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_DEPTH_BIT); - if (r != VK_SUCCESS) - goto fail; - } - - if (image->format->has_stencil) { - r = make_surface(device, image, create_info, - VK_IMAGE_ASPECT_STENCIL_BIT); - if (r != VK_SUCCESS) - goto fail; - } - } - - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - -fail: - if (image) - anv_free2(&device->alloc, alloc, image); - - return r; -} - -VkResult -anv_CreateImage(VkDevice device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *pImage) -{ - return anv_image_create(device, - &(struct anv_image_create_info) { - .vk_info = pCreateInfo, - .isl_tiling_flags = ISL_TILING_ANY_MASK, - }, - pAllocator, - pImage); -} - -void -anv_DestroyImage(VkDevice _device, VkImage _image, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - - anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); -} - -static void -anv_surface_get_subresource_layout(struct anv_image *image, - struct anv_surface *surface, - const VkImageSubresource *subresource, - VkSubresourceLayout *layout) -{ - /* If we are on a non-zero mip level or array slice, we need to - * calculate a real offset. - */ - anv_assert(subresource->mipLevel == 0); - anv_assert(subresource->arrayLayer == 0); - - layout->offset = surface->offset; - layout->rowPitch = surface->isl.row_pitch; - layout->depthPitch = isl_surf_get_array_pitch(&surface->isl); - layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl); - layout->size = surface->isl.size; -} - -void anv_GetImageSubresourceLayout( - VkDevice device, - VkImage _image, - const VkImageSubresource* pSubresource, - VkSubresourceLayout* pLayout) -{ - ANV_FROM_HANDLE(anv_image, image, _image); - - assert(__builtin_popcount(pSubresource->aspectMask) == 1); - - switch (pSubresource->aspectMask) { - case VK_IMAGE_ASPECT_COLOR_BIT: - anv_surface_get_subresource_layout(image, &image->color_surface, - pSubresource, pLayout); - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - anv_surface_get_subresource_layout(image, &image->depth_surface, - pSubresource, pLayout); - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - anv_surface_get_subresource_layout(image, &image->stencil_surface, - pSubresource, pLayout); - break; - default: - assert(!"Invalid image aspect"); - } -} - -VkResult -anv_validate_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *subresource; - const struct anv_format *view_format_info; - - /* Validate structure type before dereferencing it. */ - assert(pCreateInfo); - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); - subresource = &pCreateInfo->subresourceRange; - - /* Validate viewType is in range before using it. */ - assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); - assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); - - /* Validate format is in range before using it. */ - assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); - assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); - view_format_info = anv_format_for_vk_format(pCreateInfo->format); - - /* Validate channel swizzles. */ - assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE); - assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE); - assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE); - - /* Validate subresource. */ - assert(subresource->aspectMask != 0); - assert(subresource->levelCount > 0); - assert(subresource->layerCount > 0); - assert(subresource->baseMipLevel < image->levels); - assert(subresource->baseMipLevel + subresource->levelCount <= image->levels); - assert(subresource->baseArrayLayer < image->array_size); - assert(subresource->baseArrayLayer + subresource->layerCount <= image->array_size); - assert(pView); - - const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT - | VK_IMAGE_ASPECT_STENCIL_BIT; - - /* Validate format. */ - if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(!image->format->has_depth); - assert(!image->format->has_stencil); - assert(!view_format_info->has_depth); - assert(!view_format_info->has_stencil); - assert(view_format_info->isl_layout->bs == - image->format->isl_layout->bs); - } else if (subresource->aspectMask & ds_flags) { - assert((subresource->aspectMask & ~ds_flags) == 0); - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - assert(image->format->has_depth); - assert(view_format_info->has_depth); - assert(view_format_info->isl_layout->bs == - image->format->isl_layout->bs); - } - - if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - /* FINISHME: Is it legal to have an R8 view of S8? */ - assert(image->format->has_stencil); - assert(view_format_info->has_stencil); - } - } else { - assert(!"bad VkImageSubresourceRange::aspectFlags"); - } - - return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView); -} - -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - gen75_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - else - gen7_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 8: - gen8_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - case 9: - gen9_fill_image_surface_state(device, state.map, iview, - pCreateInfo, usage); - break; - default: - unreachable("unsupported gen\n"); - } - - if (!device->info.has_llc) - anv_state_clflush(state); -} - -static struct anv_state -alloc_surface_state(struct anv_device *device, - struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer) { - return anv_cmd_buffer_alloc_surface_state(cmd_buffer); - } else { - return anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - } -} - -static bool -has_matching_storage_typed_format(const struct anv_device *device, - enum isl_format format) -{ - return (isl_format_get_layout(format)->bs <= 4 || - (isl_format_get_layout(format)->bs <= 8 && - (device->info.gen >= 8 || device->info.is_haswell)) || - device->info.gen >= 9); -} - -static VkComponentSwizzle -remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component, - struct anv_format_swizzle format_swizzle) -{ - if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY) - swizzle = component; - - switch (swizzle) { - case VK_COMPONENT_SWIZZLE_ZERO: - return VK_COMPONENT_SWIZZLE_ZERO; - case VK_COMPONENT_SWIZZLE_ONE: - return VK_COMPONENT_SWIZZLE_ONE; - case VK_COMPONENT_SWIZZLE_R: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.r; - case VK_COMPONENT_SWIZZLE_G: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.g; - case VK_COMPONENT_SWIZZLE_B: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.b; - case VK_COMPONENT_SWIZZLE_A: - return VK_COMPONENT_SWIZZLE_R + format_swizzle.a; - default: - unreachable("Invalid swizzle"); - } -} - -void -anv_image_view_init(struct anv_image_view *iview, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer, - uint32_t offset) -{ - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - VkImageViewCreateInfo mCreateInfo; - memcpy(&mCreateInfo, pCreateInfo, sizeof(VkImageViewCreateInfo)); - - assert(range->layerCount > 0); - assert(range->baseMipLevel < image->levels); - assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - switch (image->type) { - default: - unreachable("bad VkImageType"); - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + range->layerCount - 1 <= image->array_size); - break; - case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + range->layerCount - 1 - <= anv_minify(image->extent.depth, range->baseMipLevel)); - break; - } - - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - iview->image = image; - iview->bo = image->bo; - iview->offset = image->offset + surface->offset + offset; - - iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - iview->vk_format = pCreateInfo->format; - - struct anv_format_swizzle swizzle; - iview->format = anv_get_isl_format(pCreateInfo->format, iview->aspect_mask, - image->tiling, &swizzle); - iview->swizzle.r = remap_swizzle(pCreateInfo->components.r, - VK_COMPONENT_SWIZZLE_R, swizzle); - iview->swizzle.g = remap_swizzle(pCreateInfo->components.g, - VK_COMPONENT_SWIZZLE_G, swizzle); - iview->swizzle.b = remap_swizzle(pCreateInfo->components.b, - VK_COMPONENT_SWIZZLE_B, swizzle); - iview->swizzle.a = remap_swizzle(pCreateInfo->components.a, - VK_COMPONENT_SWIZZLE_A, swizzle); - - iview->base_layer = range->baseArrayLayer; - iview->base_mip = range->baseMipLevel; - - if (!isl_format_is_compressed(iview->format) && - isl_format_is_compressed(image->format->isl_format)) { - /* Scale the ImageView extent by the backing Image. This is used - * internally when an uncompressed ImageView is created on a - * compressed Image. The ImageView can therefore be used for copying - * data from a source Image to a destination Image. - */ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - - iview->level_0_extent.depth = anv_minify(image->extent.depth, range->baseMipLevel); - iview->level_0_extent.depth = DIV_ROUND_UP(iview->level_0_extent.depth, isl_layout->bd); - - iview->level_0_extent.height = isl_surf_get_array_pitch_el_rows(&surface->isl) * image->array_size; - iview->level_0_extent.width = isl_surf_get_row_pitch_el(&surface->isl); - mCreateInfo.subresourceRange.baseMipLevel = 0; - mCreateInfo.subresourceRange.baseArrayLayer = 0; - } else { - iview->level_0_extent.width = image->extent.width; - iview->level_0_extent.height = image->extent.height; - iview->level_0_extent.depth = image->extent.depth; - } - - iview->extent = (VkExtent3D) { - .width = anv_minify(iview->level_0_extent.width , range->baseMipLevel), - .height = anv_minify(iview->level_0_extent.height, range->baseMipLevel), - .depth = anv_minify(iview->level_0_extent.depth , range->baseMipLevel), - }; - - if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer); - - anv_fill_image_surface_state(device, iview->sampler_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_SAMPLED_BIT); - } else { - iview->sampler_surface_state.alloc_size = 0; - } - - if (image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer); - - anv_fill_image_surface_state(device, iview->color_rt_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - } else { - iview->color_rt_surface_state.alloc_size = 0; - } - - if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { - iview->storage_surface_state = alloc_surface_state(device, cmd_buffer); - - if (has_matching_storage_typed_format(device, iview->format)) - anv_fill_image_surface_state(device, iview->storage_surface_state, - iview, &mCreateInfo, - VK_IMAGE_USAGE_STORAGE_BIT); - else - anv_fill_buffer_surface_state(device, iview->storage_surface_state, - ISL_FORMAT_RAW, - iview->offset, - iview->bo->size - iview->offset, 1); - - } else { - iview->storage_surface_state.alloc_size = 0; - } -} - -VkResult -anv_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_image_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (view == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_image_view_init(view, device, pCreateInfo, NULL, 0); - - *pView = anv_image_view_to_handle(view); - - return VK_SUCCESS; -} - -void -anv_DestroyImageView(VkDevice _device, VkImageView _iview, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_image_view, iview, _iview); - - if (iview->color_rt_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->color_rt_surface_state); - } - - if (iview->sampler_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->sampler_surface_state); - } - - if (iview->storage_surface_state.alloc_size > 0) { - anv_state_pool_free(&device->surface_state_pool, - iview->storage_surface_state); - } - - anv_free2(&device->alloc, pAllocator, iview); -} - -VkResult -anv_CreateBufferView(VkDevice _device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); - struct anv_buffer_view *view; - - view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!view) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - const struct anv_format *format = - anv_format_for_vk_format(pCreateInfo->format); - - view->format = format->isl_format; - view->bo = buffer->bo; - view->offset = buffer->offset + pCreateInfo->offset; - view->range = pCreateInfo->range == VK_WHOLE_SIZE ? - buffer->size - view->offset : pCreateInfo->range; - - if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) { - view->surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - - anv_fill_buffer_surface_state(device, view->surface_state, - view->format, - view->offset, view->range, - format->isl_layout->bs); - } else { - view->surface_state = (struct anv_state){ 0 }; - } - - if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - view->storage_surface_state = - anv_state_pool_alloc(&device->surface_state_pool, 64, 64); - - enum isl_format storage_format = - has_matching_storage_typed_format(device, view->format) ? - isl_lower_storage_image_format(&device->isl_dev, view->format) : - ISL_FORMAT_RAW; - - anv_fill_buffer_surface_state(device, view->storage_surface_state, - storage_format, - view->offset, view->range, - (storage_format == ISL_FORMAT_RAW ? 1 : - format->isl_layout->bs)); - - } else { - view->storage_surface_state = (struct anv_state){ 0 }; - } - - *pView = anv_buffer_view_to_handle(view); - - return VK_SUCCESS; -} - -void -anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, - const VkAllocationCallbacks *pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); - - if (view->surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, - view->surface_state); - - if (view->storage_surface_state.alloc_size > 0) - anv_state_pool_free(&device->surface_state_pool, - view->storage_surface_state); - - anv_free2(&device->alloc, pAllocator, view); -} - -struct anv_surface * -anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask) -{ - switch (aspect_mask) { - case VK_IMAGE_ASPECT_COLOR_BIT: - /* Dragons will eat you. - * - * Meta attaches all destination surfaces as color render targets. Guess - * what surface the Meta Dragons really want. - */ - if (image->format->has_depth && image->format->has_stencil) { - return &image->depth_surface; - } else if (image->format->has_depth) { - return &image->depth_surface; - } else if (image->format->has_stencil) { - return &image->stencil_surface; - } else { - return &image->color_surface; - } - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - assert(image->format->has_depth); - return &image->depth_surface; - case VK_IMAGE_ASPECT_STENCIL_BIT: - assert(image->format->has_stencil); - return &image->stencil_surface; - case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - if (image->format->has_depth && image->format->has_stencil) { - /* FINISHME: The Vulkan spec (git a511ba2) requires support for - * combined depth stencil formats. Specifically, it states: - * - * At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or - * ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported. - * - * Image views with both depth and stencil aspects are only valid for - * render target attachments, in which case - * cmd_buffer_emit_depth_stencil() will pick out both the depth and - * stencil surfaces from the underlying surface. - */ - return &image->depth_surface; - } else if (image->format->has_depth) { - return &image->depth_surface; - } else if (image->format->has_stencil) { - return &image->stencil_surface; - } - /* fallthrough */ - default: - unreachable("image does not have aspect"); - return NULL; - } -} - -static void -image_param_defaults(struct brw_image_param *param) -{ - memset(param, 0, sizeof *param); - /* Set the swizzling shifts to all-ones to effectively disable swizzling -- - * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more - * detailed explanation of these parameters. - */ - param->swizzling[0] = 0xff; - param->swizzling[1] = 0xff; -} - -void -anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - const struct isl_surf *surf = &view->image->color_surface.isl; - const int cpp = isl_format_get_layout(surf->format)->bs; - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(surf); - - param->size[0] = view->extent.width; - param->size[1] = view->extent.height; - if (surf->dim == ISL_SURF_DIM_3D) { - param->size[2] = view->extent.depth; - } else { - param->size[2] = surf->logical_level0_px.array_len - view->base_layer; - } - - isl_surf_get_image_offset_el(surf, view->base_mip, view->base_layer, 0, - ¶m->offset[0], ¶m->offset[1]); - - param->stride[0] = cpp; - param->stride[1] = surf->row_pitch / cpp; - - if (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D) { - param->stride[2] = util_align_npot(param->size[0], image_align_sa.w); - param->stride[3] = util_align_npot(param->size[1], image_align_sa.h); - } else { - param->stride[2] = 0; - param->stride[3] = isl_surf_get_array_pitch_el_rows(surf); - } - - switch (surf->tiling) { - case ISL_TILING_LINEAR: - /* image_param_defaults is good enough */ - break; - - case ISL_TILING_X: - /* An X tile is a rectangular block of 512x8 bytes. */ - param->tiling[0] = util_logbase2(512 / cpp); - param->tiling[1] = util_logbase2(8); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shifts required to swizzle bits 9 and 10 of the memory - * address with bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 4; - } - break; - - case ISL_TILING_Y0: - /* The layout of a Y-tiled surface in memory isn't really fundamentally - * different to the layout of an X-tiled surface, we simply pretend that - * the surface is broken up in a number of smaller 16Bx32 tiles, each - * one arranged in X-major order just like is the case for X-tiling. - */ - param->tiling[0] = util_logbase2(16 / cpp); - param->tiling[1] = util_logbase2(32); - - if (device->isl_dev.has_bit6_swizzling) { - /* Right shift required to swizzle bit 9 of the memory address with - * bit 6. - */ - param->swizzling[0] = 3; - param->swizzling[1] = 0xff; - } - break; - - default: - assert(!"Unhandled storage image tiling"); - } - - /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The - * address calculation algorithm (emit_address_calculation() in - * brw_fs_surface_builder.cpp) handles this as a sort of tiling with - * modulus equal to the LOD. - */ - param->tiling[2] = (device->info.gen < 9 && surf->dim == ISL_SURF_DIM_3D ? - view->base_mip : 0); -} - -void -anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param) -{ - image_param_defaults(param); - - param->stride[0] = isl_format_layouts[view->format].bs; - param->size[0] = view->range / param->stride[0]; -} diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c deleted file mode 100644 index d95d9af..0000000 --- a/src/vulkan/anv_intel.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -VkResult anv_CreateDmaBufImageINTEL( - VkDevice _device, - const VkDmaBufImageCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkDeviceMemory* pMem, - VkImage* pImage) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_device_memory *mem; - struct anv_image *image; - VkResult result; - VkImage image_h; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); - - mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (mem == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); - if (!mem->bo.gem_handle) { - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail; - } - - mem->bo.map = NULL; - mem->bo.index = 0; - mem->bo.offset = 0; - mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; - - anv_image_create(_device, - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = pCreateInfo->strideInBytes, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->format, - .extent = pCreateInfo->extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - pAllocator, &image_h); - - image = anv_image_from_handle(image_h); - image->bo = &mem->bo; - image->offset = 0; - - assert(image->extent.width > 0); - assert(image->extent.height > 0); - assert(image->extent.depth == 1); - - *pMem = anv_device_memory_to_handle(mem); - *pImage = anv_image_to_handle(image); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, mem); - - return result; -} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c deleted file mode 100644 index 82944ea..0000000 --- a/src/vulkan/anv_meta.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" - -struct anv_render_pass anv_meta_dummy_renderpass = {0}; - -void -anv_meta_save(struct anv_meta_saved_state *state, - const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_mask) -{ - state->old_pipeline = cmd_buffer->state.pipeline; - state->old_descriptor_set0 = cmd_buffer->state.descriptors[0]; - memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, - sizeof(state->old_vertex_bindings)); - - state->dynamic_mask = dynamic_mask; - anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic, - dynamic_mask); -} - -void -anv_meta_restore(const struct anv_meta_saved_state *state, - struct anv_cmd_buffer *cmd_buffer) -{ - cmd_buffer->state.pipeline = state->old_pipeline; - cmd_buffer->state.descriptors[0] = state->old_descriptor_set0; - memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, - sizeof(state->old_vertex_bindings)); - - cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1; - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic, - state->dynamic_mask); - cmd_buffer->state.dirty |= state->dynamic_mask; - - /* Since we've used the pipeline with the VS disabled, set - * need_query_wa. See CmdBeginQuery. - */ - cmd_buffer->state.need_query_wa = true; -} - -VkImageViewType -anv_meta_get_view_type(const struct anv_image *image) -{ - switch (image->type) { - case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; - case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; - case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; - default: - unreachable("bad VkImageViewType"); - } -} - -/** - * When creating a destination VkImageView, this function provides the needed - * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. - */ -uint32_t -anv_meta_get_iview_layer(const struct anv_image *dest_image, - const VkImageSubresourceLayers *dest_subresource, - const VkOffset3D *dest_offset) -{ - switch (dest_image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - return dest_subresource->baseArrayLayer; - case VK_IMAGE_TYPE_3D: - /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, - * but meta does it anyway. When doing so, we translate the - * destination's z offset into an array offset. - */ - return dest_offset->z; - default: - assert(!"bad VkImageType"); - return 0; - } -} - -static void * -meta_alloc(void* _device, size_t size, size_t alignment, - VkSystemAllocationScope allocationScope) -{ - struct anv_device *device = _device; - return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); -} - -static void * -meta_realloc(void* _device, void *original, size_t size, size_t alignment, - VkSystemAllocationScope allocationScope) -{ - struct anv_device *device = _device; - return device->alloc.pfnReallocation(device->alloc.pUserData, original, - size, alignment, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); -} - -static void -meta_free(void* _device, void *data) -{ - struct anv_device *device = _device; - return device->alloc.pfnFree(device->alloc.pUserData, data); -} - -VkResult -anv_device_init_meta(struct anv_device *device) -{ - VkResult result; - - device->meta_state.alloc = (VkAllocationCallbacks) { - .pUserData = device, - .pfnAllocation = meta_alloc, - .pfnReallocation = meta_realloc, - .pfnFree = meta_free, - }; - - result = anv_device_init_meta_clear_state(device); - if (result != VK_SUCCESS) - goto fail_clear; - - result = anv_device_init_meta_resolve_state(device); - if (result != VK_SUCCESS) - goto fail_resolve; - - result = anv_device_init_meta_blit_state(device); - if (result != VK_SUCCESS) - goto fail_blit; - - return VK_SUCCESS; - -fail_blit: - anv_device_finish_meta_resolve_state(device); -fail_resolve: - anv_device_finish_meta_clear_state(device); -fail_clear: - return result; -} - -void -anv_device_finish_meta(struct anv_device *device) -{ - anv_device_finish_meta_resolve_state(device); - anv_device_finish_meta_clear_state(device); - anv_device_finish_meta_blit_state(device); -} diff --git a/src/vulkan/anv_meta.h b/src/vulkan/anv_meta.h deleted file mode 100644 index d33e9e6..0000000 --- a/src/vulkan/anv_meta.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "anv_private.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ANV_META_VERTEX_BINDING_COUNT 2 - -struct anv_meta_saved_state { - struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT]; - struct anv_descriptor_set *old_descriptor_set0; - struct anv_pipeline *old_pipeline; - - /** - * Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic - * state. - */ - uint32_t dynamic_mask; - struct anv_dynamic_state dynamic; -}; - -VkResult anv_device_init_meta_clear_state(struct anv_device *device); -void anv_device_finish_meta_clear_state(struct anv_device *device); - -VkResult anv_device_init_meta_resolve_state(struct anv_device *device); -void anv_device_finish_meta_resolve_state(struct anv_device *device); - -VkResult anv_device_init_meta_blit_state(struct anv_device *device); -void anv_device_finish_meta_blit_state(struct anv_device *device); - -void -anv_meta_save(struct anv_meta_saved_state *state, - const struct anv_cmd_buffer *cmd_buffer, - uint32_t dynamic_mask); - -void -anv_meta_restore(const struct anv_meta_saved_state *state, - struct anv_cmd_buffer *cmd_buffer); - -VkImageViewType -anv_meta_get_view_type(const struct anv_image *image); - -uint32_t -anv_meta_get_iview_layer(const struct anv_image *dest_image, - const VkImageSubresourceLayers *dest_subresource, - const VkOffset3D *dest_offset); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_meta_blit.c b/src/vulkan/anv_meta_blit.c deleted file mode 100644 index 07ebcbc..0000000 --- a/src/vulkan/anv_meta_blit.c +++ /dev/null @@ -1,1442 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" -#include "nir/nir_builder.h" - -struct blit_region { - VkOffset3D src_offset; - VkExtent3D src_extent; - VkOffset3D dest_offset; - VkExtent3D dest_extent; -}; - -static nir_shader * -build_nir_vertex_shader(void) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs"); - - nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "a_pos"); - pos_in->data.location = VERT_ATTRIB_GENERIC0; - nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "gl_Position"); - pos_out->data.location = VARYING_SLOT_POS; - nir_copy_var(&b, pos_out, pos_in); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "a_tex_pos"); - tex_pos_in->data.location = VERT_ATTRIB_GENERIC1; - nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "v_tex_pos"); - tex_pos_out->data.location = VARYING_SLOT_VAR0; - tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH; - nir_copy_var(&b, tex_pos_out, tex_pos_in); - - return b.shader; -} - -static nir_shader * -build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - nir_builder b; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs"); - - nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, - vec4, "v_tex_pos"); - tex_pos_in->data.location = VARYING_SLOT_VAR0; - - /* Swizzle the array index which comes in as Z coordinate into the right - * position. - */ - unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 }; - nir_ssa_def *const tex_pos = - nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, - (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false); - - const struct glsl_type *sampler_type = - glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, - glsl_get_base_type(vec4)); - nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, - sampler_type, "s_tex"); - sampler->data.descriptor_set = 0; - sampler->data.binding = 0; - - nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); - tex->sampler_dim = tex_dim; - tex->op = nir_texop_tex; - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(tex_pos); - tex->dest_type = nir_type_float; /* TODO */ - tex->is_array = glsl_sampler_type_is_array(sampler_type); - tex->coord_components = tex_pos->num_components; - tex->texture = nir_deref_var_create(tex, sampler); - tex->sampler = nir_deref_var_create(tex, sampler); - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, - vec4, "f_color"); - color_out->data.location = FRAG_RESULT_DATA0; - nir_store_var(&b, color_out, &tex->dest.ssa, 4); - - return b.shader; -} - -static void -meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_meta_saved_state *saved_state) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT)); -} - -/* Returns the user-provided VkBufferImageCopy::imageOffset in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkOffset3D -meta_region_offset_el(const struct anv_image * image, - const struct VkOffset3D * offset) -{ - const struct isl_format_layout * isl_layout = image->format->isl_layout; - return (VkOffset3D) { - .x = offset->x / isl_layout->bw, - .y = offset->y / isl_layout->bh, - .z = offset->z / isl_layout->bd, - }; -} - -/* Returns the user-provided VkBufferImageCopy::imageExtent in units of - * elements rather than texels. One element equals one texel or one block - * if Image is uncompressed or compressed, respectively. - */ -static struct VkExtent3D -meta_region_extent_el(const VkFormat format, - const struct VkExtent3D * extent) -{ - const struct isl_format_layout * isl_layout = - anv_format_for_vk_format(format)->isl_layout; - return (VkExtent3D) { - .width = DIV_ROUND_UP(extent->width , isl_layout->bw), - .height = DIV_ROUND_UP(extent->height, isl_layout->bh), - .depth = DIV_ROUND_UP(extent->depth , isl_layout->bd), - }; -} - -static void -meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *src_image, - struct anv_image_view *src_iview, - VkOffset3D src_offset, - VkExtent3D src_extent, - struct anv_image *dest_image, - struct anv_image_view *dest_iview, - VkOffset3D dest_offset, - VkExtent3D dest_extent, - VkFilter blit_filter) -{ - struct anv_device *device = cmd_buffer->device; - VkDescriptorPool dummy_desc_pool = (VkDescriptorPool)1; - - struct blit_vb_data { - float pos[2]; - float tex_coord[3]; - } *vb_data; - - assert(src_image->samples == dest_image->samples); - - unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data); - - struct anv_state vb_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16); - memset(vb_state.map, 0, sizeof(struct anv_vue_header)); - vb_data = vb_state.map + sizeof(struct anv_vue_header); - - vb_data[0] = (struct blit_vb_data) { - .pos = { - dest_offset.x + dest_extent.width, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[1] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y + dest_extent.height, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)(src_offset.y + src_extent.height) / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - vb_data[2] = (struct blit_vb_data) { - .pos = { - dest_offset.x, - dest_offset.y, - }, - .tex_coord = { - (float)src_offset.x / (float)src_iview->extent.width, - (float)src_offset.y / (float)src_iview->extent.height, - (float)src_offset.z / (float)src_iview->extent.depth, - }, - }; - - anv_state_clflush(vb_state); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = vb_size, - .bo = &device->dynamic_state_block_pool.bo, - .offset = vb_state.offset, - }; - - anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, - (VkBuffer[]) { - anv_buffer_to_handle(&vertex_buffer), - anv_buffer_to_handle(&vertex_buffer) - }, - (VkDeviceSize[]) { - 0, - sizeof(struct anv_vue_header), - }); - - VkSampler sampler; - ANV_CALL(CreateSampler)(anv_device_to_handle(device), - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = blit_filter, - .minFilter = blit_filter, - }, &cmd_buffer->pool->alloc, &sampler); - - VkDescriptorSet set; - anv_AllocateDescriptorSets(anv_device_to_handle(device), - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool, - .descriptorSetCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout - }, &set); - anv_UpdateDescriptorSets(anv_device_to_handle(device), - 1, /* writeCount */ - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = set, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - } - } - }, 0, NULL); - - VkFramebuffer fb; - anv_CreateFramebuffer(anv_device_to_handle(device), - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(dest_iview), - }, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .layers = 1 - }, &cmd_buffer->pool->alloc, &fb); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.blit.render_pass, - .framebuffer = fb, - .renderArea = { - .offset = { dest_offset.x, dest_offset.y }, - .extent = { dest_extent.width, dest_extent.height }, - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, VK_SUBPASS_CONTENTS_INLINE); - - VkPipeline pipeline; - - switch (src_image->type) { - case VK_IMAGE_TYPE_1D: - pipeline = device->meta_state.blit.pipeline_1d_src; - break; - case VK_IMAGE_TYPE_2D: - pipeline = device->meta_state.blit.pipeline_2d_src; - break; - case VK_IMAGE_TYPE_3D: - pipeline = device->meta_state.blit.pipeline_3d_src; - break; - default: - unreachable(!"bad VkImageType"); - } - - if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) { - anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - } - - anv_CmdSetViewport(anv_cmd_buffer_to_handle(cmd_buffer), 0, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = dest_iview->extent.width, - .height = dest_iview->extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - - anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.blit.pipeline_layout, 0, 1, - &set, 0, NULL); - - ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - - /* At the point where we emit the draw call, all data from the - * descriptor sets, etc. has been used. We are free to delete it. - */ - anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); - anv_DestroySampler(anv_device_to_handle(device), sampler, - &cmd_buffer->pool->alloc); - anv_DestroyFramebuffer(anv_device_to_handle(device), fb, - &cmd_buffer->pool->alloc); -} - -static void -meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, - const struct anv_meta_saved_state *saved_state) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static VkFormat -vk_format_for_size(int bs) -{ - /* Note: We intentionally use the 4-channel formats whenever we can. - * This is so that, when we do a RGB <-> RGBX copy, the two formats will - * line up even though one of them is 3/4 the size of the other. - */ - switch (bs) { - case 1: return VK_FORMAT_R8_UINT; - case 2: return VK_FORMAT_R8G8_UINT; - case 3: return VK_FORMAT_R8G8B8_UINT; - case 4: return VK_FORMAT_R8G8B8A8_UINT; - case 6: return VK_FORMAT_R16G16B16_UINT; - case 8: return VK_FORMAT_R16G16B16A16_UINT; - case 12: return VK_FORMAT_R32G32B32_UINT; - case 16: return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Invalid format block size"); - } -} - -static void -do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *src, uint64_t src_offset, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat copy_format) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = copy_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = 0, - .flags = 0, - }; - - VkImage src_image; - image_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &src_image); - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(src_image)->bo = src; - anv_image_from_handle(src_image)->offset = src_offset; - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = copy_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(src_image), - &src_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - anv_image_from_handle(dest_image), - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - (VkExtent3D) { width, height, 1 }, - VK_FILTER_NEAREST); - - anv_DestroyImage(vk_device, src_image, &cmd_buffer->pool->alloc); - anv_DestroyImage(vk_device, dest_image, &cmd_buffer->pool->alloc); -} - -void anv_CmdCopyBuffer( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); - ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); - - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; - uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset; - uint64_t copy_size = pRegions[r].size; - - /* First, we compute the biggest format that can be used with the - * given offsets and size. - */ - int bs = 16; - - int fs = ffs(src_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(src_offset % bs == 0); - - fs = ffs(dest_offset) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(dest_offset % bs == 0); - - fs = ffs(pRegions[r].size) - 1; - if (fs != -1) - bs = MIN2(bs, 1 << fs); - assert(pRegions[r].size % bs == 0); - - VkFormat copy_format = vk_format_for_size(bs); - - /* This is maximum possible width/height our HW can handle */ - uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs; - while (copy_size >= max_copy_size) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, max_surface_dim, copy_format); - copy_size -= max_copy_size; - src_offset += max_copy_size; - dest_offset += max_copy_size; - } - - uint64_t height = copy_size / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - uint64_t rect_copy_size = height * max_surface_dim * bs; - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - max_surface_dim, height, copy_format); - copy_size -= rect_copy_size; - src_offset += rect_copy_size; - dest_offset += rect_copy_size; - } - - if (copy_size != 0) { - do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, - dest_buffer->bo, dest_offset, - copy_size / bs, 1, copy_format); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const uint32_t* pData) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - meta_prepare_blit(cmd_buffer, &saved_state); - - /* We can't quite grab a full block because the state stream needs a - * little data at the top to build its linked list. - */ - const uint32_t max_update_size = - cmd_buffer->device->dynamic_state_block_pool.block_size - 64; - - assert(max_update_size < (1 << 14) * 4); - - while (dataSize) { - const uint32_t copy_size = MIN2(dataSize, max_update_size); - - struct anv_state tmp_data = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64); - - memcpy(tmp_data.map, pData, copy_size); - - VkFormat format; - int bs; - if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; - bs = 16; - } else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) { - format = VK_FORMAT_R32G32_UINT; - bs = 8; - } else { - assert((copy_size & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; - bs = 4; - } - - do_buffer_copy(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, - tmp_data.offset, - dst_buffer->bo, dst_buffer->offset + dstOffset, - copy_size / bs, 1, format); - - dataSize -= copy_size; - dstOffset += copy_size; - pData = (void *)pData + copy_size; - } -} - -static VkFormat -choose_iview_format(struct anv_image *image, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - struct isl_surf *surf = - &anv_image_get_surface_for_aspect_mask(image, aspect)->isl; - - /* vkCmdCopyImage behaves like memcpy. Therefore we choose identical UINT - * formats for the source and destination image views. - * - * From the Vulkan spec (2015-12-30): - * - * vkCmdCopyImage performs image copies in a similar manner to a host - * memcpy. It does not perform general-purpose conversions such as - * scaling, resizing, blending, color-space conversion, or format - * conversions. Rather, it simply copies raw image data. vkCmdCopyImage - * can copy between images with different formats, provided the formats - * are compatible as defined below. - * - * [The spec later defines compatibility as having the same number of - * bytes per block]. - */ - return vk_format_for_size(isl_format_layouts[surf->format].bs); -} - -static VkFormat -choose_buffer_format(VkFormat format, VkImageAspectFlagBits aspect) -{ - assert(__builtin_popcount(aspect) == 1); - - /* vkCmdCopy* commands behave like memcpy. Therefore we choose - * compatable UINT formats for the source and destination image views. - * - * For the buffer, we go back to the original image format and get a - * the format as if it were linear. This way, for RGB formats, we get - * an RGB format here even if the tiled image is RGBA. XXX: This doesn't - * work if the buffer is the destination. - */ - enum isl_format linear_format = anv_get_isl_format(format, aspect, - VK_IMAGE_TILING_LINEAR, - NULL); - - return vk_format_for_size(isl_format_layouts[linear_format].bs); -} - -void anv_CmdCopyImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdCopyImage can be used to copy image data between multisample - * images, but both images must have the same number of samples. - */ - assert(src_image->samples == dest_image->samples); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask; - - VkFormat src_format = choose_iview_format(src_image, aspect); - VkFormat dst_format = choose_iview_format(dest_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = pRegions[r].dstSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffset.x, - .y = pRegions[r].dstOffset.y, - .z = 0, - }; - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].srcSubresource.layerCount == 1 && - pRegions[r].dstSubresource.layerCount == 1); - num_slices = pRegions[r].extent.depth; - } else { - assert(pRegions[r].srcSubresource.layerCount == - pRegions[r].dstSubresource.layerCount); - assert(pRegions[r].extent.depth == 1); - num_slices = pRegions[r].dstSubresource.layerCount; - } - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffset); - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].srcOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dst_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - src_offset, - pRegions[r].extent, - dest_image, &dest_iview, - dest_offset, - pRegions[r].extent, - VK_FILTER_NEAREST); - } - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdBlitImage( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkFilter filter) - -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - struct anv_meta_saved_state saved_state; - - /* From the Vulkan 1.0 spec: - * - * vkCmdBlitImage must not be used for multisampled source or - * destination images. Use vkCmdResolveImage for this purpose. - */ - assert(src_image->samples == 1); - assert(dest_image->samples == 1); - - anv_finishme("respect VkFilter"); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = pRegions[r].srcSubresource.aspectMask, - .baseMipLevel = pRegions[r].srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - const VkOffset3D dest_offset = { - .x = pRegions[r].dstOffsets[0].x, - .y = pRegions[r].dstOffsets[0].y, - .z = 0, - }; - - if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x || - pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y || - pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x || - pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y) - anv_finishme("FINISHME: Allow flipping in blits"); - - const VkExtent3D dest_extent = { - .width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x, - .height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y, - }; - - const VkExtent3D src_extent = { - .width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x, - .height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y, - }; - - const uint32_t dest_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource, - &pRegions[r].dstOffsets[0]); - - if (pRegions[r].srcSubresource.layerCount > 1) - anv_finishme("FINISHME: copy multiple array layers"); - - if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z || - pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z) - anv_finishme("FINISHME: copy multiple depth layers"); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = destImage, - .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_array_slice, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - src_image, &src_iview, - pRegions[r].srcOffsets[0], src_extent, - dest_image, &dest_iview, - dest_offset, dest_extent, - filter); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -static struct anv_image * -make_image_for_buffer(VkDevice vk_device, VkBuffer vk_buffer, VkFormat format, - VkImageUsageFlags usage, - VkImageType image_type, - const VkAllocationCallbacks *alloc, - const VkBufferImageCopy *copy) -{ - ANV_FROM_HANDLE(anv_buffer, buffer, vk_buffer); - - VkExtent3D extent = copy->imageExtent; - if (copy->bufferRowLength) - extent.width = copy->bufferRowLength; - if (copy->bufferImageHeight) - extent.height = copy->bufferImageHeight; - extent.depth = 1; - extent = meta_region_extent_el(format, &extent); - - VkImageAspectFlags aspect = copy->imageSubresource.aspectMask; - VkFormat buffer_format = choose_buffer_format(format, aspect); - - VkImage vk_image; - VkResult result = anv_CreateImage(vk_device, - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = buffer_format, - .extent = extent, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = usage, - .flags = 0, - }, alloc, &vk_image); - assert(result == VK_SUCCESS); - - ANV_FROM_HANDLE(anv_image, image, vk_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - image->bo = buffer->bo; - image->offset = buffer->offset + copy->bufferOffset; - - return image; -} - -void anv_CmdCopyBufferToImage( - VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, dest_image, destImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(dest_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(dest_image, aspect); - - struct anv_image *src_image = - make_image_for_buffer(vk_device, srcBuffer, dest_image->vk_format, - VK_IMAGE_USAGE_SAMPLED_BIT, - dest_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - const uint32_t dest_base_array_slice = - anv_meta_get_iview_layer(dest_image, &pRegions[r].imageSubresource, - &pRegions[r].imageOffset); - - unsigned num_slices_3d = pRegions[r].imageExtent.depth; - unsigned num_slices_array = pRegions[r].imageSubresource.layerCount; - unsigned slice_3d = 0; - unsigned slice_array = 0; - while (slice_3d < num_slices_3d && slice_array < num_slices_array) { - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(src_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = src_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - uint32_t img_x = 0; - uint32_t img_y = 0; - uint32_t img_o = 0; - if (isl_format_is_compressed(dest_image->format->isl_format)) - isl_surf_get_image_intratile_offset_el(&cmd_buffer->device->isl_dev, - &dest_image->color_surface.isl, - pRegions[r].imageSubresource.mipLevel, - pRegions[r].imageSubresource.baseArrayLayer + slice_array, - pRegions[r].imageOffset.z + slice_3d, - &img_o, &img_x, &img_y); - - VkOffset3D dest_offset_el = meta_region_offset_el(dest_image, & pRegions[r].imageOffset); - dest_offset_el.x += img_x; - dest_offset_el.y += img_y; - dest_offset_el.z = 0; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = anv_meta_get_view_type(dest_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_array_slice + - slice_array + slice_3d, - .layerCount = 1 - }, - }, - cmd_buffer, img_o); - - const VkExtent3D img_extent_el = meta_region_extent_el(dest_image->vk_format, - &pRegions[r].imageExtent); - - meta_emit_blit(cmd_buffer, - src_image, - &src_iview, - (VkOffset3D){0, 0, 0}, - img_extent_el, - dest_image, - &dest_iview, - dest_offset_el, - img_extent_el, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - src_image->offset += src_image->extent.width * - src_image->extent.height * - src_image->format->isl_layout->bs; - - if (dest_image->type == VK_IMAGE_TYPE_3D) - slice_3d++; - else - slice_array++; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(src_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void anv_CmdCopyImageToBuffer( - VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy* pRegions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, src_image, srcImage); - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - struct anv_meta_saved_state saved_state; - - - /* The Vulkan 1.0 spec says "srcImage must have a sample count equal to - * VK_SAMPLE_COUNT_1_BIT." - */ - assert(src_image->samples == 1); - - meta_prepare_blit(cmd_buffer, &saved_state); - - for (unsigned r = 0; r < regionCount; r++) { - VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - - VkFormat image_format = choose_iview_format(src_image, aspect); - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = srcImage, - .viewType = anv_meta_get_view_type(src_image), - .format = image_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = pRegions[r].imageSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, - .layerCount = pRegions[r].imageSubresource.layerCount, - }, - }, - cmd_buffer, 0); - - struct anv_image *dest_image = - make_image_for_buffer(vk_device, destBuffer, src_image->vk_format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - src_image->type, &cmd_buffer->pool->alloc, - &pRegions[r]); - - unsigned num_slices; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(pRegions[r].imageSubresource.layerCount == 1); - num_slices = pRegions[r].imageExtent.depth; - } else { - assert(pRegions[r].imageExtent.depth == 1); - num_slices = pRegions[r].imageSubresource.layerCount; - } - - for (unsigned slice = 0; slice < num_slices; slice++) { - VkOffset3D src_offset = pRegions[r].imageOffset; - src_offset.z += slice; - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(dest_image), - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = dest_image->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - meta_emit_blit(cmd_buffer, - anv_image_from_handle(srcImage), - &src_iview, - src_offset, - pRegions[r].imageExtent, - dest_image, - &dest_iview, - (VkOffset3D) { 0, 0, 0 }, - pRegions[r].imageExtent, - VK_FILTER_NEAREST); - - /* Once we've done the blit, all of the actual information about - * the image is embedded in the command buffer so we can just - * increment the offset directly in the image effectively - * re-binding it to different backing memory. - */ - dest_image->offset += dest_image->extent.width * - dest_image->extent.height * - src_image->format->isl_layout->bs; - } - - anv_DestroyImage(vk_device, anv_image_to_handle(dest_image), - &cmd_buffer->pool->alloc); - } - - meta_finish_blit(cmd_buffer, &saved_state); -} - -void -anv_device_finish_meta_blit_state(struct anv_device *device) -{ - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_3d_src, - &device->meta_state.alloc); - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); -} - -VkResult -anv_device_init_meta_blit_state(struct anv_device *device) -{ - VkResult result; - - result = anv_CreateRenderPass(anv_device_to_handle(device), - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .preserveAttachmentCount = 1, - .pPreserveAttachments = (uint32_t[]) { 0 }, - }, - .dependencyCount = 0, - }, &device->meta_state.alloc, &device->meta_state.blit.render_pass); - if (result != VK_SUCCESS) - goto fail; - - /* We don't use a vertex shader for blitting, but instead build and pass - * the VUEs directly to the rasterization backend. However, we do need - * to provide GLSL source for the vertex shader so that the compiler - * does not dead-code our inputs. - */ - struct anv_shader_module vs = { - .nir = build_nir_vertex_shader(), - }; - - struct anv_shader_module fs_1d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D), - }; - - struct anv_shader_module fs_2d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D), - }; - - struct anv_shader_module fs_3d = { - .nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D), - }; - - VkPipelineVertexInputStateCreateInfo vi_create_info = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 2, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = 0, - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - { - .binding = 1, - .stride = 5 * sizeof(float), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = 0 - }, - { - /* Position */ - .location = 1, - .binding = 1, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = 0 - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 1, - .format = VK_FORMAT_R32G32B32_SFLOAT, - .offset = 8 - } - } - }; - - VkDescriptorSetLayoutCreateInfo ds_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = NULL - }, - } - }; - result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device), - &ds_layout_info, - &device->meta_state.alloc, - &device->meta_state.blit.ds_layout); - if (result != VK_SUCCESS) - goto fail_render_pass; - - result = anv_CreatePipelineLayout(anv_device_to_handle(device), - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = &device->meta_state.blit.ds_layout, - }, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout); - if (result != VK_SUCCESS) - goto fail_descriptor_set_layout; - - VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = anv_shader_module_to_handle(&vs), - .pName = "main", - .pSpecializationInfo = NULL - }, { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */ - .pName = "main", - .pSpecializationInfo = NULL - }, - }; - - const VkGraphicsPipelineCreateInfo vk_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = ARRAY_SIZE(pipeline_shader_stages), - .pStages = pipeline_shader_stages, - .pVertexInputState = &vi_create_info, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { UINT32_MAX }, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .colorWriteMask = - VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT }, - } - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .layout = device->meta_state.blit.pipeline_layout, - .renderPass = device->meta_state.blit.render_pass, - .subpass = 0, - }; - - const struct anv_graphics_pipeline_create_info anv_pipeline_info = { - .color_attachment_count = -1, - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_layout; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_1d; - - pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d); - result = anv_graphics_pipeline_create(anv_device_to_handle(device), - VK_NULL_HANDLE, - &vk_pipeline_info, &anv_pipeline_info, - &device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src); - if (result != VK_SUCCESS) - goto fail_pipeline_2d; - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - - return VK_SUCCESS; - - fail_pipeline_2d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_2d_src, - &device->meta_state.alloc); - - fail_pipeline_1d: - anv_DestroyPipeline(anv_device_to_handle(device), - device->meta_state.blit.pipeline_1d_src, - &device->meta_state.alloc); - - fail_pipeline_layout: - anv_DestroyPipelineLayout(anv_device_to_handle(device), - device->meta_state.blit.pipeline_layout, - &device->meta_state.alloc); - fail_descriptor_set_layout: - anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), - device->meta_state.blit.ds_layout, - &device->meta_state.alloc); - fail_render_pass: - anv_DestroyRenderPass(anv_device_to_handle(device), - device->meta_state.blit.render_pass, - &device->meta_state.alloc); - - ralloc_free(vs.nir); - ralloc_free(fs_1d.nir); - ralloc_free(fs_2d.nir); - ralloc_free(fs_3d.nir); - fail: - return result; -} diff --git a/src/vulkan/anv_meta_clear.c b/src/vulkan/anv_meta_clear.c deleted file mode 100644 index 739ae09..0000000 --- a/src/vulkan/anv_meta_clear.c +++ /dev/null @@ -1,1098 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_meta.h" -#include "anv_private.h" -#include "nir/nir_builder.h" - -/** Vertex attributes for color clears. */ -struct color_clear_vattrs { - struct anv_vue_header vue_header; - float position[2]; /**< 3DPRIM_RECTLIST */ - VkClearColorValue color; -}; - -/** Vertex attributes for depthstencil clears. */ -struct depthstencil_clear_vattrs { - struct anv_vue_header vue_header; - float position[2]; /*<< 3DPRIM_RECTLIST */ -}; - -static void -meta_clear_begin(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR) | - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; -} - -static void -meta_clear_end(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static void -build_color_shaders(struct nir_shader **out_vs, - struct nir_shader **out_fs, - uint32_t frag_output) -{ - nir_builder vs_b; - nir_builder fs_b; - - nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL); - - vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs"); - fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs"); - - const struct glsl_type *position_type = glsl_vec4_type(); - const struct glsl_type *color_type = glsl_vec4_type(); - - nir_variable *vs_in_pos = - nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, - "a_position"); - vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; - - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, - "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_variable *vs_in_color = - nir_variable_create(vs_b.shader, nir_var_shader_in, color_type, - "a_color"); - vs_in_color->data.location = VERT_ATTRIB_GENERIC1; - - nir_variable *vs_out_color = - nir_variable_create(vs_b.shader, nir_var_shader_out, color_type, - "v_color"); - vs_out_color->data.location = VARYING_SLOT_VAR0; - vs_out_color->data.interpolation = INTERP_QUALIFIER_FLAT; - - nir_variable *fs_in_color = - nir_variable_create(fs_b.shader, nir_var_shader_in, color_type, - "v_color"); - fs_in_color->data.location = vs_out_color->data.location; - fs_in_color->data.interpolation = vs_out_color->data.interpolation; - - nir_variable *fs_out_color = - nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, - "f_color"); - fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output; - - nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); - nir_copy_var(&vs_b, vs_out_color, vs_in_color); - nir_copy_var(&fs_b, fs_out_color, fs_in_color); - - *out_vs = vs_b.shader; - *out_fs = fs_b.shader; -} - -static VkResult -create_pipeline(struct anv_device *device, - uint32_t samples, - struct nir_shader *vs_nir, - struct nir_shader *fs_nir, - const VkPipelineVertexInputStateCreateInfo *vi_state, - const VkPipelineDepthStencilStateCreateInfo *ds_state, - const VkPipelineColorBlendStateCreateInfo *cb_state, - const VkAllocationCallbacks *alloc, - bool use_repclear, - struct anv_pipeline **pipeline) -{ - VkDevice device_h = anv_device_to_handle(device); - VkResult result; - - struct anv_shader_module vs_m = { .nir = vs_nir }; - struct anv_shader_module fs_m = { .nir = fs_nir }; - - VkPipeline pipeline_h = VK_NULL_HANDLE; - result = anv_graphics_pipeline_create(device_h, - VK_NULL_HANDLE, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = fs_nir ? 2 : 1, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = anv_shader_module_to_handle(&vs_m), - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = anv_shader_module_to_handle(&fs_m), - .pName = "main", - }, - }, - .pVertexInputState = vi_state, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .pViewports = NULL, /* dynamic */ - .scissorCount = 1, - .pScissors = NULL, /* dynamic */ - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasEnable = false, - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = samples, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { ~0 }, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pDepthStencilState = ds_state, - .pColorBlendState = cb_state, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - /* The meta clear pipeline declares all state as dynamic. - * As a consequence, vkCmdBindPipeline writes no dynamic state - * to the cmd buffer. Therefore, at the end of the meta clear, - * we need only restore dynamic state was vkCmdSet. - */ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 9, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_LINE_WIDTH, - VK_DYNAMIC_STATE_DEPTH_BIAS, - VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, - VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, - VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }, - }, - .flags = 0, - .renderPass = anv_render_pass_to_handle(&anv_meta_dummy_renderpass), - .subpass = 0, - }, - &(struct anv_graphics_pipeline_create_info) { - .color_attachment_count = MAX_RTS, - .use_repclear = use_repclear, - .disable_viewport = true, - .disable_vs = true, - .use_rectlist = true - }, - alloc, - &pipeline_h); - - ralloc_free(vs_nir); - ralloc_free(fs_nir); - - *pipeline = anv_pipeline_from_handle(pipeline_h); - - return result; -} - -static VkResult -create_color_pipeline(struct anv_device *device, - uint32_t samples, - uint32_t frag_output, - struct anv_pipeline **pipeline) -{ - struct nir_shader *vs_nir; - struct nir_shader *fs_nir; - build_color_shaders(&vs_nir, &fs_nir, frag_output); - - const VkPipelineVertexInputStateCreateInfo vi_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct color_clear_vattrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct color_clear_vattrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct color_clear_vattrs, position), - }, - { - /* Color */ - .location = 2, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_SFLOAT, - .offset = offsetof(struct color_clear_vattrs, color), - }, - }, - }; - - const VkPipelineDepthStencilStateCreateInfo ds_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = false, - .depthWriteEnable = false, - .depthBoundsTestEnable = false, - .stencilTestEnable = false, - }; - - VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 }; - blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) { - .blendEnable = false, - .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo cb_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = MAX_RTS, - .pAttachments = blend_attachment_state - }; - - /* Disable repclear because we do not want the compiler to replace the - * shader. We need the shader to write to the specified color attachment, - * but the repclear shader writes to all color attachments. - */ - return - create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, - &cb_state, &device->meta_state.alloc, - /*use_repclear*/ false, pipeline); -} - -static void -destroy_pipeline(struct anv_device *device, struct anv_pipeline *pipeline) -{ - if (!pipeline) - return; - - ANV_CALL(DestroyPipeline)(anv_device_to_handle(device), - anv_pipeline_to_handle(pipeline), - &device->meta_state.alloc); -} - -void -anv_device_finish_meta_clear_state(struct anv_device *device) -{ - struct anv_meta_state *state = &device->meta_state; - - for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { - for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { - destroy_pipeline(device, state->clear[i].color_pipelines[j]); - } - - destroy_pipeline(device, state->clear[i].depth_only_pipeline); - destroy_pipeline(device, state->clear[i].stencil_only_pipeline); - destroy_pipeline(device, state->clear[i].depthstencil_pipeline); - } -} - -static void -emit_color_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const uint32_t subpass_att = clear_att->colorAttachment; - const uint32_t pass_att = subpass->color_attachments[subpass_att]; - const struct anv_image_view *iview = fb->attachments[pass_att]; - const uint32_t samples = iview->image->samples; - const uint32_t samples_log2 = ffs(samples) - 1; - struct anv_pipeline *pipeline = - device->meta_state.clear[samples_log2].color_pipelines[subpass_att]; - VkClearColorValue clear_value = clear_att->clearValue.color; - - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - VkPipeline pipeline_h = anv_pipeline_to_handle(pipeline); - - assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear)); - assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(clear_att->colorAttachment < subpass->color_count); - - const struct color_clear_vattrs vertex_data[3] = { - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x, - clear_rect->rect.offset.y, - }, - .color = clear_value, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y, - }, - .color = clear_value, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y + clear_rect->rect.extent.height, - }, - .color = clear_value, - }, - }; - - struct anv_state state = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset, - }; - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, - (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, - (VkDeviceSize[]) { 0 }); - - if (cmd_buffer->state.pipeline != pipeline) { - ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); -} - - -static void -build_depthstencil_shader(struct nir_shader **out_vs) -{ - nir_builder vs_b; - - nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL); - - vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs"); - - const struct glsl_type *position_type = glsl_vec4_type(); - - nir_variable *vs_in_pos = - nir_variable_create(vs_b.shader, nir_var_shader_in, position_type, - "a_position"); - vs_in_pos->data.location = VERT_ATTRIB_GENERIC0; - - nir_variable *vs_out_pos = - nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, - "gl_Position"); - vs_out_pos->data.location = VARYING_SLOT_POS; - - nir_copy_var(&vs_b, vs_out_pos, vs_in_pos); - - *out_vs = vs_b.shader; -} - -static VkResult -create_depthstencil_pipeline(struct anv_device *device, - VkImageAspectFlags aspects, - uint32_t samples, - struct anv_pipeline **pipeline) -{ - struct nir_shader *vs_nir; - - build_depthstencil_shader(&vs_nir); - - const VkPipelineVertexInputStateCreateInfo vi_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct depthstencil_clear_vattrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 2, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct depthstencil_clear_vattrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct depthstencil_clear_vattrs, position), - }, - }, - }; - - const VkPipelineDepthStencilStateCreateInfo ds_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - .depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT), - .depthBoundsTestEnable = false, - .stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT), - .front = { - .passOp = VK_STENCIL_OP_REPLACE, - .compareOp = VK_COMPARE_OP_ALWAYS, - .writeMask = UINT32_MAX, - .reference = 0, /* dynamic */ - }, - .back = { 0 /* dont care */ }, - }; - - const VkPipelineColorBlendStateCreateInfo cb_state = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = 0, - .pAttachments = NULL, - }; - - return create_pipeline(device, samples, vs_nir, NULL, &vi_state, &ds_state, - &cb_state, &device->meta_state.alloc, - /*use_repclear*/ true, pipeline); -} - -static void -emit_depthstencil_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_meta_state *meta_state = &device->meta_state; - const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const uint32_t pass_att = subpass->depth_stencil_attachment; - const struct anv_image_view *iview = fb->attachments[pass_att]; - const uint32_t samples = iview->image->samples; - const uint32_t samples_log2 = ffs(samples) - 1; - VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; - VkImageAspectFlags aspects = clear_att->aspectMask; - - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - - assert(samples_log2 < ARRAY_SIZE(meta_state->clear)); - assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT || - aspects == VK_IMAGE_ASPECT_STENCIL_BIT || - aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - assert(pass_att != VK_ATTACHMENT_UNUSED); - - const struct depthstencil_clear_vattrs vertex_data[3] = { - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x, - clear_rect->rect.offset.y, - }, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y, - }, - }, - { - .vue_header = { 0 }, - .position = { - clear_rect->rect.offset.x + clear_rect->rect.extent.width, - clear_rect->rect.offset.y + clear_rect->rect.extent.height, - }, - }, - }; - - struct anv_state state = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &device->dynamic_state_block_pool.bo, - .offset = state.offset, - }; - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, 0, 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - - /* Ignored when clearing only stencil. */ - .minDepth = clear_value.depth, - .maxDepth = clear_value.depth, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, 0, 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - } - }); - - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - ANV_CALL(CmdSetStencilReference)(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, - clear_value.stencil); - } - - ANV_CALL(CmdBindVertexBuffers)(cmd_buffer_h, 0, 1, - (VkBuffer[]) { anv_buffer_to_handle(&vertex_buffer) }, - (VkDeviceSize[]) { 0 }); - - struct anv_pipeline *pipeline; - switch (aspects) { - case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = meta_state->clear[samples_log2].depthstencil_pipeline; - break; - case VK_IMAGE_ASPECT_DEPTH_BIT: - pipeline = meta_state->clear[samples_log2].depth_only_pipeline; - break; - case VK_IMAGE_ASPECT_STENCIL_BIT: - pipeline = meta_state->clear[samples_log2].stencil_only_pipeline; - break; - default: - unreachable("expected depth or stencil aspect"); - } - - if (cmd_buffer->state.pipeline != pipeline) { - ANV_CALL(CmdBindPipeline)(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - anv_pipeline_to_handle(pipeline)); - } - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); -} - -VkResult -anv_device_init_meta_clear_state(struct anv_device *device) -{ - VkResult res; - struct anv_meta_state *state = &device->meta_state; - - zero(device->meta_state.clear); - - for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) { - uint32_t samples = 1 << i; - - for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) { - res = create_color_pipeline(device, samples, /* frag_output */ j, - &state->clear[i].color_pipelines[j]); - if (res != VK_SUCCESS) - goto fail; - } - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_DEPTH_BIT, samples, - &state->clear[i].depth_only_pipeline); - if (res != VK_SUCCESS) - goto fail; - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_STENCIL_BIT, samples, - &state->clear[i].stencil_only_pipeline); - if (res != VK_SUCCESS) - goto fail; - - res = create_depthstencil_pipeline(device, - VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT, samples, - &state->clear[i].depthstencil_pipeline); - if (res != VK_SUCCESS) - goto fail; - } - - return VK_SUCCESS; - -fail: - anv_device_finish_meta_clear_state(device); - return res; -} - -/** - * The parameters mean that same as those in vkCmdClearAttachments. - */ -static void -emit_clear(struct anv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) -{ - if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_color_clear(cmd_buffer, clear_att, clear_rect); - } else { - assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); - } -} - -static bool -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; - uint32_t ds = cmd_state->subpass->depth_stencil_attachment; - - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i]; - if (cmd_state->attachments[a].pending_clear_aspects) { - return true; - } - } - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - return true; - } - - return false; -} - -/** - * Emit any pending attachment clears for the current subpass. - * - * @see anv_attachment_state::pending_clear_aspects - */ -void -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_cmd_state *cmd_state = &cmd_buffer->state; - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_meta_saved_state saved_state; - - if (!subpass_needs_clear(cmd_buffer)) - return; - - meta_clear_begin(&saved_state, cmd_buffer); - - if (cmd_state->framebuffer->layers > 1) - anv_finishme("clearing multi-layer framebuffer"); - - VkClearRect clear_rect = { - .rect = { - .offset = { 0, 0 }, - .extent = { fb->width, fb->height }, - }, - .baseArrayLayer = 0, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ - }; - - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i]; - - if (!cmd_state->attachments[a].pending_clear_aspects) - continue; - - assert(cmd_state->attachments[a].pending_clear_aspects == - VK_IMAGE_ASPECT_COLOR_BIT); - - VkClearAttachment clear_att = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .colorAttachment = i, /* Use attachment index relative to subpass */ - .clearValue = cmd_state->attachments[a].clear_value, - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - cmd_state->attachments[a].pending_clear_aspects = 0; - } - - uint32_t ds = cmd_state->subpass->depth_stencil_attachment; - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - - VkClearAttachment clear_att = { - .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, - .clearValue = cmd_state->attachments[ds].clear_value, - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - cmd_state->attachments[ds].pending_clear_aspects = 0; - } - - meta_clear_end(&saved_state, cmd_buffer); -} - -static void -anv_cmd_clear_image(struct anv_cmd_buffer *cmd_buffer, - struct anv_image *image, - VkImageLayout image_layout, - const VkClearValue *clear_value, - uint32_t range_count, - const VkImageSubresourceRange *ranges) -{ - VkDevice device_h = anv_device_to_handle(cmd_buffer->device); - - for (uint32_t r = 0; r < range_count; r++) { - const VkImageSubresourceRange *range = &ranges[r]; - - for (uint32_t l = 0; l < range->levelCount; ++l) { - for (uint32_t s = 0; s < range->layerCount; ++s) { - struct anv_image_view iview; - anv_image_view_init(&iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = anv_image_to_handle(image), - .viewType = anv_meta_get_view_type(image), - .format = image->vk_format, - .subresourceRange = { - .aspectMask = range->aspectMask, - .baseMipLevel = range->baseMipLevel + l, - .levelCount = 1, - .baseArrayLayer = range->baseArrayLayer + s, - .layerCount = 1 - }, - }, - cmd_buffer, 0); - - VkFramebuffer fb; - anv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&iview), - }, - .width = iview.extent.width, - .height = iview.extent.height, - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb); - - VkAttachmentDescription att_desc = { - .format = iview.vk_format, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = image_layout, - .finalLayout = image_layout, - }; - - VkSubpassDescription subpass_desc = { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 0, - .pColorAttachments = NULL, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = NULL, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }; - - const VkAttachmentReference att_ref = { - .attachment = 0, - .layout = image_layout, - }; - - if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - subpass_desc.colorAttachmentCount = 1; - subpass_desc.pColorAttachments = &att_ref; - } else { - subpass_desc.pDepthStencilAttachment = &att_ref; - } - - VkRenderPass pass; - anv_CreateRenderPass(device_h, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &att_desc, - .subpassCount = 1, - .pSubpasses = &subpass_desc, - }, - &cmd_buffer->pool->alloc, - &pass); - - ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer), - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderArea = { - .offset = { 0, 0, }, - .extent = { - .width = iview.extent.width, - .height = iview.extent.height, - }, - }, - .renderPass = pass, - .framebuffer = fb, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - - VkClearAttachment clear_att = { - .aspectMask = range->aspectMask, - .colorAttachment = 0, - .clearValue = *clear_value, - }; - - VkClearRect clear_rect = { - .rect = { - .offset = { 0, 0 }, - .extent = { iview.extent.width, iview.extent.height }, - }, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */ - }; - - emit_clear(cmd_buffer, &clear_att, &clear_rect); - - ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer)); - ANV_CALL(DestroyRenderPass)(device_h, pass, - &cmd_buffer->pool->alloc); - ANV_CALL(DestroyFramebuffer)(device_h, fb, - &cmd_buffer->pool->alloc); - } - } - } -} - -void anv_CmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, image, image_h); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pColor, - rangeCount, pRanges); - - meta_clear_end(&saved_state, cmd_buffer); -} - -void anv_CmdClearDepthStencilImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_image, image, image_h); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - anv_cmd_clear_image(cmd_buffer, image, imageLayout, - (const VkClearValue *) pDepthStencil, - rangeCount, pRanges); - - meta_clear_end(&saved_state, cmd_buffer); -} - -void anv_CmdClearAttachments( - VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment* pAttachments, - uint32_t rectCount, - const VkClearRect* pRects) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - /* FINISHME: We can do better than this dumb loop. It thrashes too much - * state. - */ - for (uint32_t a = 0; a < attachmentCount; ++a) { - for (uint32_t r = 0; r < rectCount; ++r) { - emit_clear(cmd_buffer, &pAttachments[a], &pRects[r]); - } - } - - meta_clear_end(&saved_state, cmd_buffer); -} - -static void -do_buffer_fill(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *dest, uint64_t dest_offset, - int width, int height, VkFormat fill_format, uint32_t data) -{ - VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); - - VkImageCreateInfo image_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = fill_format, - .extent = { - .width = width, - .height = height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - .tiling = VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }; - - VkImage dest_image; - image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - anv_CreateImage(vk_device, &image_info, - &cmd_buffer->pool->alloc, &dest_image); - - /* We could use a vk call to bind memory, but that would require - * creating a dummy memory object etc. so there's really no point. - */ - anv_image_from_handle(dest_image)->bo = dest; - anv_image_from_handle(dest_image)->offset = dest_offset; - - const VkClearValue clear_value = { - .color = { - .uint32 = { data, data, data, data } - } - }; - - const VkImageSubresourceRange range = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }; - - anv_cmd_clear_image(cmd_buffer, anv_image_from_handle(dest_image), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - &clear_value, 1, &range); -} - -void anv_CmdFillBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize fillSize, - uint32_t data) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - struct anv_meta_saved_state saved_state; - - meta_clear_begin(&saved_state, cmd_buffer); - - VkFormat format; - int bs; - if ((fillSize & 15) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32B32A32_UINT; - bs = 16; - } else if ((fillSize & 7) == 0 && (dstOffset & 15) == 0) { - format = VK_FORMAT_R32G32_UINT; - bs = 8; - } else { - assert((fillSize & 3) == 0 && (dstOffset & 3) == 0); - format = VK_FORMAT_R32_UINT; - bs = 4; - } - - /* This is maximum possible width/height our HW can handle */ - const uint64_t max_surface_dim = 1 << 14; - - /* First, we make a bunch of max-sized copies */ - const uint64_t max_fill_size = max_surface_dim * max_surface_dim * bs; - while (fillSize > max_fill_size) { - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - max_surface_dim, max_surface_dim, format, data); - fillSize -= max_fill_size; - dstOffset += max_fill_size; - } - - uint64_t height = fillSize / (max_surface_dim * bs); - assert(height < max_surface_dim); - if (height != 0) { - const uint64_t rect_fill_size = height * max_surface_dim * bs; - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - max_surface_dim, height, format, data); - fillSize -= rect_fill_size; - dstOffset += rect_fill_size; - } - - if (fillSize != 0) { - do_buffer_fill(cmd_buffer, dst_buffer->bo, - dst_buffer->offset + dstOffset, - fillSize / bs, 1, format, data); - } - - meta_clear_end(&saved_state, cmd_buffer); -} diff --git a/src/vulkan/anv_meta_resolve.c b/src/vulkan/anv_meta_resolve.c deleted file mode 100644 index ea5020c..0000000 --- a/src/vulkan/anv_meta_resolve.c +++ /dev/null @@ -1,867 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_meta.h" -#include "anv_private.h" -#include "nir/nir_builder.h" - -/** - * Vertex attributes used by all pipelines. - */ -struct vertex_attrs { - struct anv_vue_header vue_header; - float position[2]; /**< 3DPRIM_RECTLIST */ - float tex_position[2]; -}; - -static void -meta_resolve_save(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_save(saved_state, cmd_buffer, - (1 << VK_DYNAMIC_STATE_VIEWPORT) | - (1 << VK_DYNAMIC_STATE_SCISSOR)); - - cmd_buffer->state.dynamic.viewport.count = 0; - cmd_buffer->state.dynamic.scissor.count = 0; -} - -static void -meta_resolve_restore(struct anv_meta_saved_state *saved_state, - struct anv_cmd_buffer *cmd_buffer) -{ - anv_meta_restore(saved_state, cmd_buffer); -} - -static VkPipeline * -get_pipeline_h(struct anv_device *device, uint32_t samples) -{ - uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */ - - assert(samples >= 2); - assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines)); - - return &device->meta_state.resolve.pipelines[i]; -} - -static nir_shader * -build_nir_vs(void) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - - nir_builder b; - nir_variable *a_position; - nir_variable *v_position; - nir_variable *a_tex_position; - nir_variable *v_tex_position; - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); - b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs"); - - a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "a_position"); - a_position->data.location = VERT_ATTRIB_GENERIC0; - - v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "gl_Position"); - v_position->data.location = VARYING_SLOT_POS; - - a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "a_tex_position"); - a_tex_position->data.location = VERT_ATTRIB_GENERIC1; - - v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "v_tex_position"); - v_tex_position->data.location = VARYING_SLOT_VAR0; - - nir_copy_var(&b, v_position, a_position); - nir_copy_var(&b, v_tex_position, a_tex_position); - - return b.shader; -} - -static nir_shader * -build_nir_fs(uint32_t num_samples) -{ - const struct glsl_type *vec4 = glsl_vec4_type(); - - const struct glsl_type *sampler2DMS = - glsl_sampler_type(GLSL_SAMPLER_DIM_MS, - /*is_shadow*/ false, - /*is_array*/ false, - GLSL_TYPE_FLOAT); - - nir_builder b; - nir_variable *u_tex; /* uniform sampler */ - nir_variable *v_position; /* vec4, varying fragment position */ - nir_variable *v_tex_position; /* vec4, varying texture coordinate */ - nir_variable *f_color; /* vec4, fragment output color */ - nir_ssa_def *accum; /* vec4, accumulation of sample values */ - - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); - b.shader->info.name = ralloc_asprintf(b.shader, - "meta_resolve_fs_samples%02d", - num_samples); - - u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS, - "u_tex"); - u_tex->data.descriptor_set = 0; - u_tex->data.binding = 0; - - v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "v_position"); - v_position->data.location = VARYING_SLOT_POS; - v_position->data.origin_upper_left = true; - - v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4, - "v_tex_position"); - v_tex_position->data.location = VARYING_SLOT_VAR0; - - f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, - "f_color"); - f_color->data.location = FRAG_RESULT_DATA0; - - accum = nir_imm_vec4(&b, 0, 0, 0, 0); - - nir_ssa_def *tex_position_ivec = - nir_f2i(&b, nir_load_var(&b, v_tex_position)); - - for (uint32_t i = 0; i < num_samples; ++i) { - nir_tex_instr *tex; - - tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2); - tex->texture = nir_deref_var_create(tex, u_tex); - tex->sampler = nir_deref_var_create(tex, u_tex); - tex->sampler_dim = GLSL_SAMPLER_DIM_MS; - tex->op = nir_texop_txf_ms; - tex->src[0].src = nir_src_for_ssa(tex_position_ivec); - tex->src[0].src_type = nir_tex_src_coord; - tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); - tex->src[1].src_type = nir_tex_src_ms_index; - tex->dest_type = nir_type_float; - tex->is_array = false; - tex->coord_components = 3; - nir_ssa_dest_init(&tex->instr, &tex->dest, /*num_components*/ 4, "tex"); - nir_builder_instr_insert(&b, &tex->instr); - - accum = nir_fadd(&b, accum, &tex->dest.ssa); - } - - accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples)); - nir_store_var(&b, f_color, accum, /*writemask*/ 4); - - return b.shader; -} - -static VkResult -create_pass(struct anv_device *device) -{ - VkResult result; - VkDevice device_h = anv_device_to_handle(device); - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - result = anv_CreateRenderPass(device_h, - &(VkRenderPassCreateInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = &(VkAttachmentDescription) { - .format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */ - .samples = 1, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - .subpassCount = 1, - .pSubpasses = &(VkSubpassDescription) { - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .colorAttachmentCount = 1, - .pColorAttachments = &(VkAttachmentReference) { - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }, - .pResolveAttachments = NULL, - .pDepthStencilAttachment = &(VkAttachmentReference) { - .attachment = VK_ATTACHMENT_UNUSED, - }, - .preserveAttachmentCount = 0, - .pPreserveAttachments = NULL, - }, - .dependencyCount = 0, - }, - alloc, - &device->meta_state.resolve.pass); - - return result; -} - -static VkResult -create_pipeline(struct anv_device *device, - uint32_t num_samples, - VkShaderModule vs_module_h) -{ - VkResult result; - VkDevice device_h = anv_device_to_handle(device); - - struct anv_shader_module fs_module = { - .nir = build_nir_fs(num_samples), - }; - - if (!fs_module.nir) { - /* XXX: Need more accurate error */ - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto cleanup; - } - - result = anv_graphics_pipeline_create(device_h, - VK_NULL_HANDLE, - &(VkGraphicsPipelineCreateInfo) { - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, - .pStages = (VkPipelineShaderStageCreateInfo[]) { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = vs_module_h, - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = anv_shader_module_to_handle(&fs_module), - .pName = "main", - }, - }, - .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { - { - .binding = 0, - .stride = sizeof(struct vertex_attrs), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX - }, - }, - .vertexAttributeDescriptionCount = 3, - .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { - { - /* VUE Header */ - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32B32A32_UINT, - .offset = offsetof(struct vertex_attrs, vue_header), - }, - { - /* Position */ - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct vertex_attrs, position), - }, - { - /* Texture Coordinate */ - .location = 2, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(struct vertex_attrs, tex_position), - }, - }, - }, - .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = false, - }, - .pViewportState = &(VkPipelineViewportStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .viewportCount = 1, - .scissorCount = 1, - }, - .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .depthClampEnable = false, - .rasterizerDiscardEnable = false, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - }, - .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .rasterizationSamples = 1, - .sampleShadingEnable = false, - .pSampleMask = (VkSampleMask[]) { 0x1 }, - .alphaToCoverageEnable = false, - .alphaToOneEnable = false, - }, - .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .logicOpEnable = false, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT, - }, - }, - }, - .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .dynamicStateCount = 2, - .pDynamicStates = (VkDynamicState[]) { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }, - }, - .layout = device->meta_state.resolve.pipeline_layout, - .renderPass = device->meta_state.resolve.pass, - .subpass = 0, - }, - &(struct anv_graphics_pipeline_create_info) { - .color_attachment_count = -1, - .use_repclear = false, - .disable_viewport = true, - .disable_scissor = true, - .disable_vs = true, - .use_rectlist = true - }, - &device->meta_state.alloc, - get_pipeline_h(device, num_samples)); - if (result != VK_SUCCESS) - goto cleanup; - - goto cleanup; - -cleanup: - ralloc_free(fs_module.nir); - return result; -} - -void -anv_device_finish_meta_resolve_state(struct anv_device *device) -{ - struct anv_meta_state *state = &device->meta_state; - VkDevice device_h = anv_device_to_handle(device); - VkRenderPass pass_h = device->meta_state.resolve.pass; - VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout; - VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout; - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - if (pass_h) - ANV_CALL(DestroyRenderPass)(device_h, pass_h, - &device->meta_state.alloc); - - if (pipeline_layout_h) - ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc); - - if (ds_layout_h) - ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc); - - for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) { - VkPipeline pipeline_h = state->resolve.pipelines[i]; - - if (pipeline_h) { - ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc); - } - } -} - -VkResult -anv_device_init_meta_resolve_state(struct anv_device *device) -{ - VkResult res = VK_SUCCESS; - VkDevice device_h = anv_device_to_handle(device); - const VkAllocationCallbacks *alloc = &device->meta_state.alloc; - - const isl_sample_count_mask_t sample_count_mask = - isl_device_get_sample_counts(&device->isl_dev); - - zero(device->meta_state.resolve); - - struct anv_shader_module vs_module = { .nir = build_nir_vs() }; - if (!vs_module.nir) { - /* XXX: Need more accurate error */ - res = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } - - VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module); - - res = anv_CreateDescriptorSetLayout(device_h, - &(VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = (VkDescriptorSetLayoutBinding[]) { - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - }, - }, - }, - alloc, - &device->meta_state.resolve.ds_layout); - if (res != VK_SUCCESS) - goto fail; - - res = anv_CreatePipelineLayout(device_h, - &(VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .setLayoutCount = 1, - .pSetLayouts = (VkDescriptorSetLayout[]) { - device->meta_state.resolve.ds_layout, - }, - }, - alloc, - &device->meta_state.resolve.pipeline_layout); - if (res != VK_SUCCESS) - goto fail; - - res = create_pass(device); - if (res != VK_SUCCESS) - goto fail; - - for (uint32_t i = 0; - i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) { - - uint32_t sample_count = 1 << (1 + i); - if (!(sample_count_mask & sample_count)) - continue; - - res = create_pipeline(device, sample_count, vs_module_h); - if (res != VK_SUCCESS) - goto fail; - } - - goto cleanup; - -fail: - anv_device_finish_meta_resolve_state(device); - -cleanup: - ralloc_free(vs_module.nir); - - return res; -} - -static void -emit_resolve(struct anv_cmd_buffer *cmd_buffer, - struct anv_image_view *src_iview, - const VkOffset2D *src_offset, - struct anv_image_view *dest_iview, - const VkOffset2D *dest_offset, - const VkExtent2D *resolve_extent) -{ - struct anv_device *device = cmd_buffer->device; - VkDevice device_h = anv_device_to_handle(device); - VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer); - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image *src_image = src_iview->image; - VkDescriptorPool dummy_desc_pool_h = (VkDescriptorPool) 1; - - const struct vertex_attrs vertex_data[3] = { - { - .vue_header = {0}, - .position = { - dest_offset->x + resolve_extent->width, - dest_offset->y + resolve_extent->height, - }, - .tex_position = { - src_offset->x + resolve_extent->width, - src_offset->y + resolve_extent->height, - }, - }, - { - .vue_header = {0}, - .position = { - dest_offset->x, - dest_offset->y + resolve_extent->height, - }, - .tex_position = { - src_offset->x, - src_offset->y + resolve_extent->height, - }, - }, - { - .vue_header = {0}, - .position = { - dest_offset->x, - dest_offset->y, - }, - .tex_position = { - src_offset->x, - src_offset->y, - }, - }, - }; - - struct anv_state vertex_mem = - anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data, - sizeof(vertex_data), 16); - - struct anv_buffer vertex_buffer = { - .device = device, - .size = sizeof(vertex_data), - .bo = &cmd_buffer->dynamic_state_stream.block_pool->bo, - .offset = vertex_mem.offset, - }; - - VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer); - - anv_CmdBindVertexBuffers(cmd_buffer_h, - /*firstBinding*/ 0, - /*bindingCount*/ 1, - (VkBuffer[]) { vertex_buffer_h }, - (VkDeviceSize[]) { 0 }); - - VkSampler sampler_h; - ANV_CALL(CreateSampler)(device_h, - &(VkSamplerCreateInfo) { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .mipLodBias = 0.0, - .anisotropyEnable = false, - .compareEnable = false, - .minLod = 0.0, - .maxLod = 0.0, - .unnormalizedCoordinates = false, - }, - &cmd_buffer->pool->alloc, - &sampler_h); - - VkDescriptorSet desc_set_h; - anv_AllocateDescriptorSets(device_h, - &(VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = dummy_desc_pool_h, - .descriptorSetCount = 1, - .pSetLayouts = (VkDescriptorSetLayout[]) { - device->meta_state.resolve.ds_layout, - }, - }, - &desc_set_h); - - ANV_FROM_HANDLE(anv_descriptor_set, desc_set, desc_set_h); - - anv_UpdateDescriptorSets(device_h, - /*writeCount*/ 1, - (VkWriteDescriptorSet[]) { - { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = desc_set_h, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = (VkDescriptorImageInfo[]) { - { - .sampler = sampler_h, - .imageView = anv_image_view_to_handle(src_iview), - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }, - }, - }, - }, - /*copyCount*/ 0, - /*copies */ NULL); - - ANV_CALL(CmdSetViewport)(cmd_buffer_h, - /*firstViewport*/ 0, - /*viewportCount*/ 1, - (VkViewport[]) { - { - .x = 0, - .y = 0, - .width = fb->width, - .height = fb->height, - .minDepth = 0.0, - .maxDepth = 1.0, - }, - }); - - ANV_CALL(CmdSetScissor)(cmd_buffer_h, - /*firstScissor*/ 0, - /*scissorCount*/ 1, - (VkRect2D[]) { - { - .offset = { 0, 0 }, - .extent = (VkExtent2D) { fb->width, fb->height }, - }, - }); - - VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples); - ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h); - - if (cmd_buffer->state.pipeline != pipeline) { - anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - } - - anv_CmdBindDescriptorSets(cmd_buffer_h, - VK_PIPELINE_BIND_POINT_GRAPHICS, - device->meta_state.resolve.pipeline_layout, - /*firstSet*/ 0, - /* setCount */ 1, - (VkDescriptorSet[]) { - desc_set_h, - }, - /*copyCount*/ 0, - /*copies */ NULL); - - ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0); - - /* All objects below are consumed by the draw call. We may safely destroy - * them. - */ - anv_descriptor_set_destroy(device, desc_set); - anv_DestroySampler(device_h, sampler_h, - &cmd_buffer->pool->alloc); -} - -void anv_CmdResolveImage( - VkCommandBuffer cmd_buffer_h, - VkImage src_image_h, - VkImageLayout src_image_layout, - VkImage dest_image_h, - VkImageLayout dest_image_layout, - uint32_t region_count, - const VkImageResolve* regions) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h); - ANV_FROM_HANDLE(anv_image, src_image, src_image_h); - ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h); - struct anv_device *device = cmd_buffer->device; - struct anv_meta_saved_state state; - VkDevice device_h = anv_device_to_handle(device); - - meta_resolve_save(&state, cmd_buffer); - - assert(src_image->samples > 1); - assert(dest_image->samples == 1); - - if (src_image->samples >= 16) { - /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the - * glBlitFramebuffer workaround for samples >= 16. - */ - anv_finishme("vkCmdResolveImage: need interpolation workaround when " - "samples >= 16"); - } - - if (src_image->array_size > 1) - anv_finishme("vkCmdResolveImage: multisample array images"); - - for (uint32_t r = 0; r < region_count; ++r) { - const VkImageResolve *region = ®ions[r]; - - /* From the Vulkan 1.0 spec: - * - * - The aspectMask member of srcSubresource and dstSubresource must - * only contain VK_IMAGE_ASPECT_COLOR_BIT - * - * - The layerCount member of srcSubresource and dstSubresource must - * match - */ - assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); - assert(region->srcSubresource.layerCount == - region->dstSubresource.layerCount); - - const uint32_t src_base_layer = - anv_meta_get_iview_layer(src_image, ®ion->srcSubresource, - ®ion->srcOffset); - - const uint32_t dest_base_layer = - anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, - ®ion->dstOffset); - - for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; - ++layer) { - - struct anv_image_view src_iview; - anv_image_view_init(&src_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = src_image_h, - .viewType = anv_meta_get_view_type(src_image), - .format = src_image->format->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = region->srcSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = src_base_layer + layer, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - struct anv_image_view dest_iview; - anv_image_view_init(&dest_iview, cmd_buffer->device, - &(VkImageViewCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = dest_image_h, - .viewType = anv_meta_get_view_type(dest_image), - .format = dest_image->format->vk_format, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = region->dstSubresource.mipLevel, - .levelCount = 1, - .baseArrayLayer = dest_base_layer + layer, - .layerCount = 1, - }, - }, - cmd_buffer, 0); - - VkFramebuffer fb_h; - anv_CreateFramebuffer(device_h, - &(VkFramebufferCreateInfo) { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkImageView[]) { - anv_image_view_to_handle(&dest_iview), - }, - .width = anv_minify(dest_image->extent.width, - region->dstSubresource.mipLevel), - .height = anv_minify(dest_image->extent.height, - region->dstSubresource.mipLevel), - .layers = 1 - }, - &cmd_buffer->pool->alloc, - &fb_h); - - ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h, - &(VkRenderPassBeginInfo) { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = device->meta_state.resolve.pass, - .framebuffer = fb_h, - .renderArea = { - .offset = { - region->dstOffset.x, - region->dstOffset.y, - }, - .extent = { - region->extent.width, - region->extent.height, - } - }, - .clearValueCount = 0, - .pClearValues = NULL, - }, - VK_SUBPASS_CONTENTS_INLINE); - - emit_resolve(cmd_buffer, - &src_iview, - &(VkOffset2D) { - .x = region->srcOffset.x, - .y = region->srcOffset.y, - }, - &dest_iview, - &(VkOffset2D) { - .x = region->dstOffset.x, - .y = region->dstOffset.y, - }, - &(VkExtent2D) { - .width = region->extent.width, - .height = region->extent.height, - }); - - ANV_CALL(CmdEndRenderPass)(cmd_buffer_h); - - anv_DestroyFramebuffer(device_h, fb_h, - &cmd_buffer->pool->alloc); - } - } - - meta_resolve_restore(&state, cmd_buffer); -} - -/** - * Emit any needed resolves for the current subpass. - */ -void -anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_meta_saved_state saved_state; - - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - - if (!subpass->has_resolve) - return; - - meta_resolve_save(&saved_state, cmd_buffer); - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - uint32_t src_att = subpass->color_attachments[i]; - uint32_t dest_att = subpass->resolve_attachments[i]; - - if (dest_att == VK_ATTACHMENT_UNUSED) - continue; - - struct anv_image_view *src_iview = fb->attachments[src_att]; - struct anv_image_view *dest_iview = fb->attachments[dest_att]; - - struct anv_subpass resolve_subpass = { - .color_count = 1, - .color_attachments = (uint32_t[]) { dest_att }, - .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, - }; - - anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); - - /* Subpass resolves must respect the render area. We can ignore the - * render area here because vkCmdBeginRenderPass set the render area - * with 3DSTATE_DRAWING_RECTANGLE. - * - * XXX(chadv): Does the hardware really respect - * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST? - */ - emit_resolve(cmd_buffer, - src_iview, - &(VkOffset2D) { 0, 0 }, - dest_iview, - &(VkOffset2D) { 0, 0 }, - &(VkExtent2D) { fb->width, fb->height }); - } - - cmd_buffer->state.subpass = subpass; - meta_resolve_restore(&saved_state, cmd_buffer); -} diff --git a/src/vulkan/anv_nir.h b/src/vulkan/anv_nir.h deleted file mode 100644 index a7ea3eb..0000000 --- a/src/vulkan/anv_nir.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "nir/nir.h" -#include "anv_private.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar); - -void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data); -void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data); - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_nir_apply_dynamic_offsets.c b/src/vulkan/anv_nir_apply_dynamic_offsets.c deleted file mode 100644 index e71a8ff..0000000 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" -#include "nir/nir_builder.h" - -struct apply_dynamic_offsets_state { - nir_shader *shader; - nir_builder builder; - - const struct anv_pipeline_layout *layout; - - uint32_t indices_start; -}; - -static bool -apply_dynamic_offsets_block(nir_block *block, void *void_state) -{ - struct apply_dynamic_offsets_state *state = void_state; - struct anv_descriptor_set_layout *set_layout; - - nir_builder *b = &state->builder; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - unsigned block_idx_src; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - block_idx_src = 0; - break; - case nir_intrinsic_store_ssbo: - block_idx_src = 1; - break; - default: - continue; /* the loop */ - } - - nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr; - assert(res_instr->type == nir_instr_type_intrinsic); - nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr); - assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index); - - unsigned set = res_intrin->const_index[0]; - unsigned binding = res_intrin->const_index[1]; - - set_layout = state->layout->set[set].layout; - if (set_layout->binding[binding].dynamic_offset_index < 0) - continue; - - b->cursor = nir_before_instr(&intrin->instr); - - /* First, we need to generate the uniform load for the buffer offset */ - uint32_t index = state->layout->set[set].dynamic_offset_start + - set_layout->binding[binding].dynamic_offset_index; - - nir_intrinsic_instr *offset_load = - nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); - offset_load->num_components = 2; - offset_load->const_index[0] = state->indices_start + index * 8; - offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa, - nir_imm_int(b, 8))); - - nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL); - nir_builder_instr_insert(b, &offset_load->instr); - - nir_src *offset_src = nir_get_io_offset_src(intrin); - nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa, - &offset_load->dest.ssa); - - /* In order to avoid out-of-bounds access, we predicate */ - nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1), - offset_src->ssa); - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(pred); - nir_cf_node_insert(b->cursor, &if_stmt->cf_node); - - nir_instr_remove(&intrin->instr); - *offset_src = nir_src_for_ssa(new_offset); - nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr); - - if (intrin->intrinsic != nir_intrinsic_store_ssbo) { - /* It's a load, we need a phi node */ - nir_phi_instr *phi = nir_phi_instr_create(b->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - intrin->num_components, NULL); - - nir_phi_src *src1 = ralloc(phi, nir_phi_src); - struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list); - src1->pred = exec_node_data(nir_block, tnode, cf_node.node); - src1->src = nir_src_for_ssa(&intrin->dest.ssa); - exec_list_push_tail(&phi->srcs, &src1->node); - - b->cursor = nir_after_cf_list(&if_stmt->else_list); - nir_ssa_def *zero = nir_build_imm(b, intrin->num_components, - (nir_const_value) { .u = { 0, 0, 0, 0 } }); - - nir_phi_src *src2 = ralloc(phi, nir_phi_src); - struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list); - src2->pred = exec_node_data(nir_block, enode, cf_node.node); - src2->src = nir_src_for_ssa(zero); - exec_list_push_tail(&phi->srcs, &src2->node); - - assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&phi->dest.ssa)); - - nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr); - } - } - - return true; -} - -void -anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - struct apply_dynamic_offsets_state state = { - .shader = shader, - .layout = pipeline->layout, - .indices_start = shader->num_uniforms, - }; - - if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets) - return; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_builder_init(&state.builder, function->impl); - nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state); - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } - - struct anv_push_constants *null_data = NULL; - for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) { - prog_data->param[i * 2 + shader->num_uniforms] = - (const union gl_constant_value *)&null_data->dynamic[i].offset; - prog_data->param[i * 2 + 1 + shader->num_uniforms] = - (const union gl_constant_value *)&null_data->dynamic[i].range; - } - - shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8; -} diff --git a/src/vulkan/anv_nir_apply_pipeline_layout.c b/src/vulkan/anv_nir_apply_pipeline_layout.c deleted file mode 100644 index c58a938..0000000 --- a/src/vulkan/anv_nir_apply_pipeline_layout.c +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" -#include "program/prog_parameter.h" -#include "nir/nir_builder.h" - -struct apply_pipeline_layout_state { - nir_shader *shader; - nir_builder builder; - - struct { - BITSET_WORD *used; - uint8_t *surface_offsets; - uint8_t *sampler_offsets; - uint8_t *image_offsets; - } set[MAX_SETS]; -}; - -static void -add_binding(struct apply_pipeline_layout_state *state, - uint32_t set, uint32_t binding) -{ - BITSET_SET(state->set[set].used, binding); -} - -static void -add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var) -{ - add_binding(state, var->data.descriptor_set, var->data.binding); -} - -static bool -get_used_bindings_block(nir_block *block, void *void_state) -{ - struct apply_pipeline_layout_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_vulkan_resource_index: - add_binding(state, nir_intrinsic_desc_set(intrin), - nir_intrinsic_binding(intrin)); - break; - - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - case nir_intrinsic_image_size: - case nir_intrinsic_image_samples: - add_var_binding(state, intrin->variables[0]->var); - break; - - default: - break; - } - break; - } - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - assert(tex->texture); - add_var_binding(state, tex->texture->var); - if (tex->sampler) - add_var_binding(state, tex->sampler->var); - break; - } - default: - continue; - } - } - - return true; -} - -static void -lower_res_index_intrinsic(nir_intrinsic_instr *intrin, - struct apply_pipeline_layout_state *state) -{ - nir_builder *b = &state->builder; - - b->cursor = nir_before_instr(&intrin->instr); - - uint32_t set = nir_intrinsic_desc_set(intrin); - uint32_t binding = nir_intrinsic_binding(intrin); - - uint32_t surface_index = state->set[set].surface_offsets[binding]; - - nir_const_value *const_block_idx = - nir_src_as_const_value(intrin->src[0]); - - nir_ssa_def *block_index; - if (const_block_idx) { - block_index = nir_imm_int(b, surface_index + const_block_idx->u[0]); - } else { - block_index = nir_iadd(b, nir_imm_int(b, surface_index), - nir_ssa_for_src(b, intrin->src[0], 1)); - } - - assert(intrin->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); - nir_instr_remove(&intrin->instr); -} - -static void -lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref, - unsigned *const_index, nir_tex_src_type src_type, - struct apply_pipeline_layout_state *state) -{ - if (deref->deref.child) { - assert(deref->deref.child->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); - - *const_index += deref_array->base_offset; - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src, - tex->num_srcs + 1); - - for (unsigned i = 0; i < tex->num_srcs; i++) { - new_srcs[i].src_type = tex->src[i].src_type; - nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src); - } - - ralloc_free(tex->src); - tex->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - tex->src[tex->num_srcs].src_type = src_type; - tex->num_srcs++; - assert(deref_array->indirect.is_ssa); - nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src, - deref_array->indirect); - } - } -} - -static void -cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref) -{ - if (deref->deref.child == NULL) - return; - - nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child); - - if (deref_array->deref_array_type != nir_deref_array_type_indirect) - return; - - nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT); -} - -static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) -{ - /* No one should have come by and lowered it already */ - assert(tex->texture); - - unsigned set = tex->texture->var->data.descriptor_set; - unsigned binding = tex->texture->var->data.binding; - tex->texture_index = state->set[set].surface_offsets[binding]; - lower_tex_deref(tex, tex->texture, &tex->texture_index, - nir_tex_src_texture_offset, state); - - if (tex->sampler) { - unsigned set = tex->sampler->var->data.descriptor_set; - unsigned binding = tex->sampler->var->data.binding; - tex->sampler_index = state->set[set].surface_offsets[binding]; - lower_tex_deref(tex, tex->sampler, &tex->sampler_index, - nir_tex_src_sampler_offset, state); - } - - /* The backend only ever uses this to mark used surfaces. We don't care - * about that little optimization so it just needs to be non-zero. - */ - tex->texture_array_size = 1; - - cleanup_tex_deref(tex, tex->texture); - if (tex->sampler) - cleanup_tex_deref(tex, tex->sampler); - tex->texture = NULL; - tex->sampler = NULL; -} - -static bool -apply_pipeline_layout_block(nir_block *block, void *void_state) -{ - struct apply_pipeline_layout_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) { - lower_res_index_intrinsic(intrin, state); - } - break; - } - case nir_instr_type_tex: - lower_tex(nir_instr_as_tex(instr), state); - break; - default: - continue; - } - } - - return true; -} - -static void -setup_vec4_uniform_value(const union gl_constant_value **params, - const union gl_constant_value *values, - unsigned n) -{ - static const gl_constant_value zero = { 0 }; - - for (unsigned i = 0; i < n; ++i) - params[i] = &values[i]; - - for (unsigned i = n; i < 4; ++i) - params[i] = &zero; -} - -void -anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline, - nir_shader *shader, - struct brw_stage_prog_data *prog_data) -{ - struct anv_pipeline_layout *layout = pipeline->layout; - - struct apply_pipeline_layout_state state = { - .shader = shader, - }; - - void *mem_ctx = ralloc_context(NULL); - - for (unsigned s = 0; s < layout->num_sets; s++) { - const unsigned count = layout->set[s].layout->binding_count; - const unsigned words = BITSET_WORDS(count); - state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words); - state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count); - state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count); - state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count); - } - - nir_foreach_function(shader, function) { - if (function->impl) - nir_foreach_block(function->impl, get_used_bindings_block, &state); - } - - struct anv_pipeline_bind_map map = { - .surface_count = 0, - .sampler_count = 0, - }; - - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - - BITSET_WORD b, _tmp; - BITSET_FOREACH_SET(b, _tmp, state.set[set].used, - set_layout->binding_count) { - if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) - map.surface_count += set_layout->binding[b].array_size; - if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) - map.sampler_count += set_layout->binding[b].array_size; - if (set_layout->binding[b].stage[shader->stage].image_index >= 0) - map.image_count += set_layout->binding[b].array_size; - } - } - - map.surface_to_descriptor = - malloc(map.surface_count * sizeof(struct anv_pipeline_binding)); - map.sampler_to_descriptor = - malloc(map.sampler_count * sizeof(struct anv_pipeline_binding)); - - pipeline->bindings[shader->stage] = map; - - unsigned surface = 0; - unsigned sampler = 0; - unsigned image = 0; - for (uint32_t set = 0; set < layout->num_sets; set++) { - struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; - - BITSET_WORD b, _tmp; - BITSET_FOREACH_SET(b, _tmp, state.set[set].used, - set_layout->binding_count) { - unsigned array_size = set_layout->binding[b].array_size; - unsigned set_offset = set_layout->binding[b].descriptor_index; - - if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) { - state.set[set].surface_offsets[b] = surface; - for (unsigned i = 0; i < array_size; i++) { - map.surface_to_descriptor[surface + i].set = set; - map.surface_to_descriptor[surface + i].offset = set_offset + i; - } - surface += array_size; - } - - if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) { - state.set[set].sampler_offsets[b] = sampler; - for (unsigned i = 0; i < array_size; i++) { - map.sampler_to_descriptor[sampler + i].set = set; - map.sampler_to_descriptor[sampler + i].offset = set_offset + i; - } - sampler += array_size; - } - - if (set_layout->binding[b].stage[shader->stage].image_index >= 0) { - state.set[set].image_offsets[b] = image; - image += array_size; - } - } - } - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_builder_init(&state.builder, function->impl); - nir_foreach_block(function->impl, apply_pipeline_layout_block, &state); - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } - - if (map.image_count > 0) { - nir_foreach_variable(var, &shader->uniforms) { - if (glsl_type_is_image(var->type) || - (glsl_type_is_array(var->type) && - glsl_type_is_image(glsl_get_array_element(var->type)))) { - /* Images are represented as uniform push constants and the actual - * information required for reading/writing to/from the image is - * storred in the uniform. - */ - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned image_index = state.set[set].image_offsets[binding]; - - var->data.driver_location = shader->num_uniforms + - image_index * BRW_IMAGE_PARAM_SIZE * 4; - } - } - - struct anv_push_constants *null_data = NULL; - const gl_constant_value **param = prog_data->param + shader->num_uniforms; - const struct brw_image_param *image_param = null_data->images; - for (uint32_t i = 0; i < map.image_count; i++) { - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const union gl_constant_value *)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const union gl_constant_value *)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const union gl_constant_value *)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const union gl_constant_value *)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const union gl_constant_value *)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const union gl_constant_value *)image_param->swizzling, 2); - - param += BRW_IMAGE_PARAM_SIZE; - image_param ++; - } - - shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4; - } -} diff --git a/src/vulkan/anv_nir_lower_push_constants.c b/src/vulkan/anv_nir_lower_push_constants.c deleted file mode 100644 index 53cd3d7..0000000 --- a/src/vulkan/anv_nir_lower_push_constants.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_nir.h" - -struct lower_push_constants_state { - nir_shader *shader; - bool is_scalar; -}; - -static bool -lower_push_constants_block(nir_block *block, void *void_state) -{ - struct lower_push_constants_state *state = void_state; - - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - /* TODO: Handle indirect push constants */ - if (intrin->intrinsic != nir_intrinsic_load_push_constant) - continue; - - /* This wont work for vec4 stages. */ - assert(state->is_scalar); - - assert(intrin->const_index[0] % 4 == 0); - assert(intrin->const_index[1] == 128); - - /* We just turn them into uniform loads with the appropreate offset */ - intrin->intrinsic = nir_intrinsic_load_uniform; - } - - return true; -} - -void -anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar) -{ - struct lower_push_constants_state state = { - .shader = shader, - .is_scalar = is_scalar, - }; - - nir_foreach_function(shader, function) { - if (function->impl) - nir_foreach_block(function->impl, lower_push_constants_block, &state); - } - - assert(shader->num_uniforms % 4 == 0); - if (is_scalar) - shader->num_uniforms /= 4; - else - shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16); -} diff --git a/src/vulkan/anv_pass.c b/src/vulkan/anv_pass.c deleted file mode 100644 index d07e9fe..0000000 --- a/src/vulkan/anv_pass.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -VkResult anv_CreateRenderPass( - VkDevice _device, - const VkRenderPassCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkRenderPass* pRenderPass) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_render_pass *pass; - size_t size; - size_t attachments_offset; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - - size = sizeof(*pass); - size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); - attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - - pass = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - /* Clear the subpasses along with the parent pass. This required because - * each array member of anv_subpass must be a valid pointer if not NULL. - */ - memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = (void *) pass + attachments_offset; - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - struct anv_render_pass_attachment *att = &pass->attachments[i]; - - att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format); - att->samples = pCreateInfo->pAttachments[i].samples; - att->load_op = pCreateInfo->pAttachments[i].loadOp; - att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - } - - uint32_t subpass_attachment_count = 0, *p; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - - subpass_attachment_count += - desc->inputAttachmentCount + - desc->colorAttachmentCount + - /* Count colorAttachmentCount again for resolve_attachments */ - desc->colorAttachmentCount; - } - - pass->subpass_attachments = - anv_alloc2(&device->alloc, pAllocator, - subpass_attachment_count * sizeof(uint32_t), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass->subpass_attachments == NULL) { - anv_free2(&device->alloc, pAllocator, pass); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - p = pass->subpass_attachments; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - struct anv_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputAttachmentCount; - subpass->color_count = desc->colorAttachmentCount; - - if (desc->inputAttachmentCount > 0) { - subpass->input_attachments = p; - p += desc->inputAttachmentCount; - - for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] - = desc->pInputAttachments[j].attachment; - } - } - - if (desc->colorAttachmentCount > 0) { - subpass->color_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] - = desc->pColorAttachments[j].attachment; - } - } - - subpass->has_resolve = false; - if (desc->pResolveAttachments) { - subpass->resolve_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = a; - if (a != VK_ATTACHMENT_UNUSED) - subpass->has_resolve = true; - } - } - - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = - desc->pDepthStencilAttachment->attachment; - } else { - subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED; - } - } - - *pRenderPass = anv_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -void anv_DestroyRenderPass( - VkDevice _device, - VkRenderPass _pass, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_render_pass, pass, _pass); - - anv_free2(&device->alloc, pAllocator, pass->subpass_attachments); - anv_free2(&device->alloc, pAllocator, pass); -} - -void anv_GetRenderAreaGranularity( - VkDevice device, - VkRenderPass renderPass, - VkExtent2D* pGranularity) -{ - *pGranularity = (VkExtent2D) { 1, 1 }; -} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c deleted file mode 100644 index a7feefb..0000000 --- a/src/vulkan/anv_pipeline.c +++ /dev/null @@ -1,1278 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "util/mesa-sha1.h" -#include "anv_private.h" -#include "brw_nir.h" -#include "anv_nir.h" -#include "nir/spirv/nir_spirv.h" - -/* Needed for SWIZZLE macros */ -#include "program/prog_instruction.h" - -// Shader functions - -VkResult anv_CreateShaderModule( - VkDevice _device, - const VkShaderModuleCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkShaderModule* pShaderModule) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_shader_module *module; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - module = anv_alloc2(&device->alloc, pAllocator, - sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (module == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - module->nir = NULL; - module->size = pCreateInfo->codeSize; - memcpy(module->data, pCreateInfo->pCode, module->size); - - _mesa_sha1_compute(module->data, module->size, module->sha1); - - *pShaderModule = anv_shader_module_to_handle(module); - - return VK_SUCCESS; -} - -void anv_DestroyShaderModule( - VkDevice _device, - VkShaderModule _module, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_shader_module, module, _module); - - anv_free2(&device->alloc, pAllocator, module); -} - -#define SPIR_V_MAGIC_NUMBER 0x07230203 - -/* Eventually, this will become part of anv_CreateShader. Unfortunately, - * we can't do that yet because we don't have the ability to copy nir. - */ -static nir_shader * -anv_shader_compile_to_nir(struct anv_device *device, - struct anv_shader_module *module, - const char *entrypoint_name, - gl_shader_stage stage, - const VkSpecializationInfo *spec_info) -{ - if (strcmp(entrypoint_name, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } - - const struct brw_compiler *compiler = - device->instance->physicalDevice.compiler; - const nir_shader_compiler_options *nir_options = - compiler->glsl_compiler_options[stage].NirOptions; - - nir_shader *nir; - nir_function *entry_point; - if (module->nir) { - /* Some things such as our meta clear/blit code will give us a NIR - * shader directly. In that case, we just ignore the SPIR-V entirely - * and just use the NIR shader */ - nir = module->nir; - nir->options = nir_options; - nir_validate_shader(nir); - - assert(exec_list_length(&nir->functions) == 1); - struct exec_node *node = exec_list_get_head(&nir->functions); - entry_point = exec_node_data(nir_function, node, node); - } else { - uint32_t *spirv = (uint32_t *) module->data; - assert(spirv[0] == SPIR_V_MAGIC_NUMBER); - assert(module->size % 4 == 0); - - uint32_t num_spec_entries = 0; - struct nir_spirv_specialization *spec_entries = NULL; - if (spec_info && spec_info->mapEntryCount > 0) { - num_spec_entries = spec_info->mapEntryCount; - spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); - for (uint32_t i = 0; i < num_spec_entries; i++) { - const uint32_t *data = - spec_info->pData + spec_info->pMapEntries[i].offset; - assert((const void *)(data + 1) <= - spec_info->pData + spec_info->dataSize); - - spec_entries[i].id = spec_info->pMapEntries[i].constantID; - spec_entries[i].data = *data; - } - } - - entry_point = spirv_to_nir(spirv, module->size / 4, - spec_entries, num_spec_entries, - stage, entrypoint_name, nir_options); - nir = entry_point->shader; - assert(nir->stage == stage); - nir_validate_shader(nir); - - free(spec_entries); - - nir_lower_returns(nir); - nir_validate_shader(nir); - - nir_inline_functions(nir); - nir_validate_shader(nir); - - /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func != entry_point) - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); - entry_point->name = ralloc_strdup(entry_point, "main"); - - nir_remove_dead_variables(nir, nir_var_shader_in); - nir_remove_dead_variables(nir, nir_var_shader_out); - nir_remove_dead_variables(nir, nir_var_system_value); - nir_validate_shader(nir); - - nir_lower_outputs_to_temporaries(entry_point->shader, entry_point); - - nir_lower_system_values(nir); - nir_validate_shader(nir); - } - - /* Vulkan uses the separate-shader linking model */ - nir->info.separate_shader = true; - - nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); - - nir_shader_gather_info(nir, entry_point->impl); - - uint32_t indirect_mask = 0; - if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) - indirect_mask |= (1 << nir_var_shader_in); - if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= 1 << nir_var_local; - - nir_lower_indirect_derefs(nir, indirect_mask); - - return nir; -} - -void anv_DestroyPipeline( - VkDevice _device, - VkPipeline _pipeline, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); - - anv_reloc_list_finish(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); - if (pipeline->blend_state.map) - anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); - anv_free2(&device->alloc, pAllocator, pipeline); -} - -static const uint32_t vk_to_gen_primitive_type[] = { - [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, - [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, - [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ -}; - -static void -populate_sampler_prog_key(const struct brw_device_info *devinfo, - struct brw_sampler_prog_key_data *key) -{ - /* XXX: Handle texture swizzle on HSW- */ - for (int i = 0; i < MAX_SAMPLERS; i++) { - /* Assume color sampler, no swizzling. (Works for BDW+) */ - key->swizzles[i] = SWIZZLE_XYZW; - } -} - -static void -populate_vs_prog_key(const struct brw_device_info *devinfo, - struct brw_vs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); - - /* XXX: Handle vertex input work-arounds */ - - /* XXX: Handle sampler_prog_key */ -} - -static void -populate_gs_prog_key(const struct brw_device_info *devinfo, - struct brw_gs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); -} - -static void -populate_wm_prog_key(const struct brw_device_info *devinfo, - const VkGraphicsPipelineCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra, - struct brw_wm_prog_key *key) -{ - ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); - - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); - - /* TODO: Fill out key->input_slots_valid */ - - /* Vulkan doesn't specify a default */ - key->high_quality_derivatives = false; - - /* XXX Vulkan doesn't appear to specify */ - key->clamp_fragment_color = false; - - /* Vulkan always specifies upper-left coordinates */ - key->drawable_height = 0; - key->render_to_fbo = false; - - if (extra && extra->color_attachment_count >= 0) { - key->nr_color_regions = extra->color_attachment_count; - } else { - key->nr_color_regions = - render_pass->subpasses[info->subpass].color_count; - } - - key->replicate_alpha = key->nr_color_regions > 1 && - info->pMultisampleState && - info->pMultisampleState->alphaToCoverageEnable; - - if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { - /* We should probably pull this out of the shader, but it's fairly - * harmless to compute it and then let dead-code take care of it. - */ - key->persample_shading = info->pMultisampleState->sampleShadingEnable; - if (key->persample_shading) - key->persample_2x = info->pMultisampleState->rasterizationSamples == 2; - - key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; - key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; - } -} - -static void -populate_cs_prog_key(const struct brw_device_info *devinfo, - struct brw_cs_prog_key *key) -{ - memset(key, 0, sizeof(*key)); - - populate_sampler_prog_key(devinfo, &key->tex); -} - -static nir_shader * -anv_pipeline_compile(struct anv_pipeline *pipeline, - struct anv_shader_module *module, - const char *entrypoint, - gl_shader_stage stage, - const VkSpecializationInfo *spec_info, - struct brw_stage_prog_data *prog_data) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - - nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, - module, entrypoint, stage, - spec_info); - if (nir == NULL) - return NULL; - - anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]); - - /* Figure out the number of parameters */ - prog_data->nr_params = 0; - - if (nir->num_uniforms > 0) { - /* If the shader uses any push constants at all, we'll just give - * them the maximum possible number - */ - prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - - if (pipeline->bindings[stage].image_count > 0) - prog_data->nr_params += pipeline->bindings[stage].image_count * - BRW_IMAGE_PARAM_SIZE; - - if (prog_data->nr_params > 0) { - /* XXX: I think we're leaking this */ - prog_data->param = (const union gl_constant_value **) - malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); - - /* We now set the param values to be offsets into a - * anv_push_constant_data structure. Since the compiler doesn't - * actually dereference any of the gl_constant_value pointers in the - * params array, it doesn't really matter what we put here. - */ - struct anv_push_constants *null_data = NULL; - if (nir->num_uniforms > 0) { - /* Fill out the push constants section of the param array */ - for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) - prog_data->param[i] = (const union gl_constant_value *) - &null_data->client_data[i * sizeof(float)]; - } - } - - /* Set up dynamic offsets */ - anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); - - char surface_usage_mask[256], sampler_usage_mask[256]; - zero(surface_usage_mask); - zero(sampler_usage_mask); - - /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ - if (pipeline->layout) - anv_nir_apply_pipeline_layout(pipeline, nir, prog_data); - - /* All binding table offsets provided by apply_pipeline_layout() are - * relative to the start of the bindint table (plus MAX_RTS for VS). - */ - unsigned bias; - switch (stage) { - case MESA_SHADER_FRAGMENT: - bias = MAX_RTS; - break; - case MESA_SHADER_COMPUTE: - bias = 1; - break; - default: - bias = 0; - break; - } - prog_data->binding_table.size_bytes = 0; - prog_data->binding_table.texture_start = bias; - prog_data->binding_table.ubo_start = bias; - prog_data->binding_table.ssbo_start = bias; - prog_data->binding_table.image_start = bias; - - /* Finish the optimization and compilation process */ - if (nir->stage != MESA_SHADER_VERTEX && - nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL && - nir->stage != MESA_SHADER_FRAGMENT) { - nir = brw_nir_lower_io(nir, &pipeline->device->info, - compiler->scalar_stage[stage], false, NULL); - } - - /* nir_lower_io will only handle the push constants; we need to set this - * to the full number of possible uniforms. - */ - nir->num_uniforms = prog_data->nr_params * 4; - - return nir; -} - -static void -anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, - gl_shader_stage stage, - struct brw_stage_prog_data *prog_data) -{ - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = 0, - [MESA_SHADER_TESS_EVAL] = 0, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, - }; - - pipeline->prog_data[stage] = prog_data; - pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; -} - -static VkResult -anv_pipeline_compile_vs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - struct brw_vs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_vs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - prog_data->inputs_read = nir->info.inputs_read; - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, false, -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - ralloc_free(mem_ctx); - } - - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { - pipeline->vs_simd8 = kernel; - pipeline->vs_vec4 = NO_KERNEL; - } else { - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = kernel; - } - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - &prog_data->base.base); - - return VK_SUCCESS; -} - -static VkResult -anv_pipeline_compile_gs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; - struct brw_gs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_gs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) - pipeline->writes_point_size = true; - - brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, - nir->info.outputs_written, - nir->info.separate_shader); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - /* TODO: SIMD8 GS */ - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - - ralloc_free(mem_ctx); - } - - pipeline->gs_kernel = kernel; - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - &prog_data->base.base); - - return VK_SUCCESS; -} - -static VkResult -anv_pipeline_compile_fs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; - struct brw_wm_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_wm_prog_key(&pipeline->device->info, info, extra, &key); - - if (pipeline->use_repclear) - key.nr_color_regions = 1; - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - prog_data->binding_table.render_target_start = 0; - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - nir_function_impl *impl = nir_shader_get_entrypoint(nir)->impl; - nir_foreach_variable_safe(var, &nir->outputs) { - if (var->data.location < FRAG_RESULT_DATA0) - continue; - - unsigned rt = var->data.location - FRAG_RESULT_DATA0; - if (rt >= key.nr_color_regions) { - var->data.mode = nir_var_local; - exec_node_remove(&var->node); - exec_list_push_tail(&impl->locals, &var->node); - } - } - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, - NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - - ralloc_free(mem_ctx); - } - - if (prog_data->no_8) - pipeline->ps_simd8 = NO_KERNEL; - else - pipeline->ps_simd8 = kernel; - - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; - } else { - pipeline->ps_simd16 = NO_KERNEL; - } - - pipeline->ps_ksp2 = 0; - pipeline->ps_grf_start2 = 0; - if (pipeline->ps_simd8 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; - if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; - } - } else if (pipeline->ps_simd16 != NO_KERNEL) { - pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; - } - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - &prog_data->base); - - return VK_SUCCESS; -} - -VkResult -anv_pipeline_compile_cs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - const struct brw_compiler *compiler = - pipeline->device->instance->physicalDevice.compiler; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct brw_cs_prog_key key; - uint32_t kernel; - unsigned char sha1[20], *hash; - - populate_cs_prog_key(&pipeline->device->info, &key); - - if (module->size > 0) { - hash = sha1; - anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); - } else { - hash = NULL; - } - - if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); - - prog_data->binding_table.work_groups_start = 0; - - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, - MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); - if (nir == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - prog_data->base.total_shared = nir->num_shared; - - void *mem_ctx = ralloc_context(NULL); - - if (module->nir == NULL) - ralloc_steal(mem_ctx, nir); - - unsigned code_size; - const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, - -1, &code_size, NULL); - if (shader_code == NULL) { - ralloc_free(mem_ctx); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - kernel = anv_pipeline_cache_upload_kernel(cache, hash, - shader_code, code_size, - prog_data, sizeof(*prog_data)); - ralloc_free(mem_ctx); - } - - pipeline->cs_simd = kernel; - - anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - &prog_data->base); - - return VK_SUCCESS; -} - -static const int gen8_push_size = 32 * 1024; - -static void -gen7_compute_urb_partition(struct anv_pipeline *pipeline) -{ - const struct brw_device_info *devinfo = &pipeline->device->info; - bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; - unsigned vs_entry_size_bytes = vs_size * 64; - bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; - unsigned gs_entry_size_bytes = gs_size * 64; - - /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): - * - * VS Number of URB Entries must be divisible by 8 if the VS URB Entry - * Allocation Size is less than 9 512-bit URB entries. - * - * Similar text exists for GS. - */ - unsigned vs_granularity = (vs_size < 9) ? 8 : 1; - unsigned gs_granularity = (gs_size < 9) ? 8 : 1; - - /* URB allocations must be done in 8k chunks. */ - unsigned chunk_size_bytes = 8192; - - /* Determine the size of the URB in chunks. */ - unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; - - /* Reserve space for push constants */ - unsigned push_constant_bytes = gen8_push_size; - unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - - /* Initially, assign each stage the minimum amount of URB space it needs, - * and make a note of how much additional space it "wants" (the amount of - * additional space it could actually make use of). - */ - - /* VS has a lower limit on the number of URB entries */ - unsigned vs_chunks = - ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - unsigned vs_wants = - ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - vs_chunks; - - unsigned gs_chunks = 0; - unsigned gs_wants = 0; - if (gs_present) { - /* There are two constraints on the minimum amount of URB space we can - * allocate: - * - * (1) We need room for at least 2 URB entries, since we always operate - * the GS in DUAL_OBJECT mode. - * - * (2) We can't allocate less than nr_gs_entries_granularity. - */ - gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes; - gs_wants = - ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, - chunk_size_bytes) / chunk_size_bytes - gs_chunks; - } - - /* There should always be enough URB space to satisfy the minimum - * requirements of each stage. - */ - unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; - assert(total_needs <= urb_chunks); - - /* Mete out remaining space (if any) in proportion to "wants". */ - unsigned total_wants = vs_wants + gs_wants; - unsigned remaining_space = urb_chunks - total_needs; - if (remaining_space > total_wants) - remaining_space = total_wants; - if (remaining_space > 0) { - unsigned vs_additional = (unsigned) - round(vs_wants * (((double) remaining_space) / total_wants)); - vs_chunks += vs_additional; - remaining_space -= vs_additional; - gs_chunks += remaining_space; - } - - /* Sanity check that we haven't over-allocated. */ - assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); - - /* Finally, compute the number of entries that can fit in the space - * allocated to each stage. - */ - unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; - unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; - - /* Since we rounded up when computing *_wants, this may be slightly more - * than the maximum allowed amount, so correct for that. - */ - nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); - nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); - - /* Ensure that we program a multiple of the granularity. */ - nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); - nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); - - /* Finally, sanity check to make sure we have at least the minimum number - * of entries needed for each stage. - */ - assert(nr_vs_entries >= devinfo->urb.min_vs_entries); - if (gs_present) - assert(nr_gs_entries >= 2); - - /* Lay out the URB in the following order: - * - push constants - * - VS - * - GS - */ - pipeline->urb.vs_start = push_constant_chunks; - pipeline->urb.vs_size = vs_size; - pipeline->urb.nr_vs_entries = nr_vs_entries; - - pipeline->urb.gs_start = push_constant_chunks + vs_chunks; - pipeline->urb.gs_size = gs_size; - pipeline->urb.nr_gs_entries = nr_gs_entries; -} - -static void -anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, - const VkGraphicsPipelineCreateInfo *pCreateInfo) -{ - anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; - ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); - struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; - - pipeline->dynamic_state = default_dynamic_state; - - if (pCreateInfo->pDynamicState) { - /* Remove all of the states that are marked as dynamic */ - uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; - for (uint32_t s = 0; s < count; s++) - states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); - } - - struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; - - dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; - if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - typed_memcpy(dynamic->viewport.viewports, - pCreateInfo->pViewportState->pViewports, - pCreateInfo->pViewportState->viewportCount); - } - - dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; - if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - typed_memcpy(dynamic->scissor.scissors, - pCreateInfo->pViewportState->pScissors, - pCreateInfo->pViewportState->scissorCount); - } - - if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { - assert(pCreateInfo->pRasterizationState); - dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth; - } - - if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { - assert(pCreateInfo->pRasterizationState); - dynamic->depth_bias.bias = - pCreateInfo->pRasterizationState->depthBiasConstantFactor; - dynamic->depth_bias.clamp = - pCreateInfo->pRasterizationState->depthBiasClamp; - dynamic->depth_bias.slope = - pCreateInfo->pRasterizationState->depthBiasSlopeFactor; - } - - if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { - assert(pCreateInfo->pColorBlendState); - typed_memcpy(dynamic->blend_constants, - pCreateInfo->pColorBlendState->blendConstants, 4); - } - - /* If there is no depthstencil attachment, then don't read - * pDepthStencilState. The Vulkan spec states that pDepthStencilState may - * be NULL in this case. Even if pDepthStencilState is non-NULL, there is - * no need to override the depthstencil defaults in - * anv_pipeline::dynamic_state when there is no depthstencil attachment. - * - * From the Vulkan spec (20 Oct 2015, git-aa308cb): - * - * pDepthStencilState [...] may only be NULL if renderPass and subpass - * specify a subpass that has no depth/stencil attachment. - */ - if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { - if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->depth_bounds.min = - pCreateInfo->pDepthStencilState->minDepthBounds; - dynamic->depth_bounds.max = - pCreateInfo->pDepthStencilState->maxDepthBounds; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_compare_mask.front = - pCreateInfo->pDepthStencilState->front.compareMask; - dynamic->stencil_compare_mask.back = - pCreateInfo->pDepthStencilState->back.compareMask; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_write_mask.front = - pCreateInfo->pDepthStencilState->front.writeMask; - dynamic->stencil_write_mask.back = - pCreateInfo->pDepthStencilState->back.writeMask; - } - - if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - assert(pCreateInfo->pDepthStencilState); - dynamic->stencil_reference.front = - pCreateInfo->pDepthStencilState->front.reference; - dynamic->stencil_reference.back = - pCreateInfo->pDepthStencilState->back.reference; - } - } - - pipeline->dynamic_state_mask = states; -} - -static void -anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) -{ - struct anv_render_pass *renderpass = NULL; - struct anv_subpass *subpass = NULL; - - /* Assert that all required members of VkGraphicsPipelineCreateInfo are - * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section - * 4.2 Graphics Pipeline. - */ - assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - renderpass = anv_render_pass_from_handle(info->renderPass); - assert(renderpass); - - if (renderpass != &anv_meta_dummy_renderpass) { - assert(info->subpass < renderpass->subpass_count); - subpass = &renderpass->subpasses[info->subpass]; - } - - assert(info->stageCount >= 1); - assert(info->pVertexInputState); - assert(info->pInputAssemblyState); - assert(info->pViewportState); - assert(info->pRasterizationState); - - if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) - assert(info->pDepthStencilState); - - if (subpass && subpass->color_count > 0) - assert(info->pColorBlendState); - - for (uint32_t i = 0; i < info->stageCount; ++i) { - switch (info->pStages[i].stage) { - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - assert(info->pTessellationState); - break; - default: - break; - } - } -} - -VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, - struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc) -{ - VkResult result; - - anv_validate { - anv_pipeline_validate_create_info(pCreateInfo); - } - - if (alloc == NULL) - alloc = &device->alloc; - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); - if (result != VK_SUCCESS) - return result; - - pipeline->batch.alloc = alloc; - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); - - if (pCreateInfo->pTessellationState) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); - - pipeline->use_repclear = extra && extra->use_repclear; - pipeline->writes_point_size = false; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); - - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_kernel = NO_KERNEL; - pipeline->ps_ksp0 = NO_KERNEL; - - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - ANV_FROM_HANDLE(anv_shader_module, module, - pCreateInfo->pStages[i].module); - - switch (pCreateInfo->pStages[i].stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - anv_pipeline_compile_vs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_GEOMETRY_BIT: - anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - case VK_SHADER_STAGE_FRAGMENT_BIT: - anv_pipeline_compile_fs(pipeline, cache, pCreateInfo, extra, module, - pCreateInfo->pStages[i].pName, - pCreateInfo->pStages[i].pSpecializationInfo); - break; - default: - anv_finishme("Unsupported shader stage"); - } - } - - if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { - /* Vertex is only optional if disable_vs is set */ - assert(extra->disable_vs); - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); - } - - gen7_compute_urb_partition(pipeline); - - const VkPipelineVertexInputStateCreateInfo *vi_info = - pCreateInfo->pVertexInputState; - - uint64_t inputs_read; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - inputs_read = ~0ull; - } else { - inputs_read = pipeline->vs_prog_data.inputs_read; - } - - pipeline->vb_used = 0; - for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &vi_info->pVertexAttributeDescriptions[i]; - - if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) - pipeline->vb_used |= 1 << desc->binding; - } - - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *desc = - &vi_info->pVertexBindingDescriptions[i]; - - pipeline->binding_stride[desc->binding] = desc->stride; - - /* Step rate is programmed per vertex element (attribute), not - * binding. Set up a map of which bindings step per instance, for - * reference by vertex element setup. */ - switch (desc->inputRate) { - default: - case VK_VERTEX_INPUT_RATE_VERTEX: - pipeline->instancing_enable[desc->binding] = false; - break; - case VK_VERTEX_INPUT_RATE_INSTANCE: - pipeline->instancing_enable[desc->binding] = true; - break; - } - } - - const VkPipelineInputAssemblyStateCreateInfo *ia_info = - pCreateInfo->pInputAssemblyState; - pipeline->primitive_restart = ia_info->primitiveRestartEnable; - pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; - - if (extra && extra->use_rectlist) - pipeline->topology = _3DPRIM_RECTLIST; - - while (anv_block_pool_size(&device->scratch_block_pool) < - pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - - return VK_SUCCESS; -} - -VkResult -anv_graphics_pipeline_create( - VkDevice _device, - VkPipelineCache _cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - if (cache == NULL) - cache = &device->default_pipeline_cache; - - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - return gen75_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - else - return gen7_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - case 8: - return gen8_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - case 9: - return gen9_graphics_pipeline_create(_device, cache, pCreateInfo, extra, pAllocator, pPipeline); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_CreateGraphicsPipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_graphics_pipeline_create(_device, - pipelineCache, - &pCreateInfos[i], - NULL, pAllocator, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j], pAllocator); - } - - return result; - } - } - - return VK_SUCCESS; -} - -static VkResult anv_compute_pipeline_create( - VkDevice _device, - VkPipelineCache _cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - if (cache == NULL) - cache = &device->default_pipeline_cache; - - switch (device->info.gen) { - case 7: - if (device->info.is_haswell) - return gen75_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - else - return gen7_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - case 8: - return gen8_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - case 9: - return gen9_compute_pipeline_create(_device, cache, pCreateInfo, pAllocator, pPipeline); - default: - unreachable("unsupported gen\n"); - } -} - -VkResult anv_CreateComputePipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - result = anv_compute_pipeline_create(_device, pipelineCache, - &pCreateInfos[i], - pAllocator, &pPipelines[i]); - if (result != VK_SUCCESS) { - for (unsigned j = 0; j < i; j++) { - anv_DestroyPipeline(_device, pPipelines[j], pAllocator); - } - - return result; - } - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_pipeline_cache.c b/src/vulkan/anv_pipeline_cache.c deleted file mode 100644 index c89bb2a..0000000 --- a/src/vulkan/anv_pipeline_cache.c +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/mesa-sha1.h" -#include "util/debug.h" -#include "anv_private.h" - -/* Remaining work: - * - * - Compact binding table layout so it's tight and not dependent on - * descriptor set layout. - * - * - Review prog_data struct for size and cacheability: struct - * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 - * bit quantities etc; param, pull_param, and image_params are pointers, we - * just need the compation map. use bit fields for all bools, eg - * dual_src_blend. - */ - -void -anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device) -{ - cache->device = device; - anv_state_stream_init(&cache->program_stream, - &device->instruction_block_pool); - pthread_mutex_init(&cache->mutex, NULL); - - cache->kernel_count = 0; - cache->total_size = 0; - cache->table_size = 1024; - const size_t byte_size = cache->table_size * sizeof(cache->table[0]); - cache->table = malloc(byte_size); - - /* We don't consider allocation failure fatal, we just start with a 0-sized - * cache. */ - if (cache->table == NULL) - cache->table_size = 0; - else - memset(cache->table, 0xff, byte_size); -} - -void -anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) -{ - anv_state_stream_finish(&cache->program_stream); - pthread_mutex_destroy(&cache->mutex); - free(cache->table); -} - -struct cache_entry { - unsigned char sha1[20]; - uint32_t prog_data_size; - uint32_t kernel_size; - char prog_data[0]; - - /* kernel follows prog_data at next 64 byte aligned address */ -}; - -void -anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info) -{ - struct mesa_sha1 *ctx; - - ctx = _mesa_sha1_init(); - _mesa_sha1_update(ctx, &key, sizeof(key)); - _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); - _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); - /* hash in shader stage, pipeline layout? */ - if (spec_info) { - _mesa_sha1_update(ctx, spec_info->pMapEntries, - spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); - _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); - } - _mesa_sha1_final(ctx, hash); -} - -uint32_t -anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) sha1); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - const uint32_t offset = cache->table[index]; - - if (offset == ~0) - return NO_KERNEL; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { - if (prog_data) - memcpy(prog_data, entry->prog_data, entry->prog_data_size); - - const uint32_t preamble_size = - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - return offset + preamble_size; - } - } - - return NO_KERNEL; -} - -static void -anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache, - struct cache_entry *entry, uint32_t entry_offset) -{ - const uint32_t mask = cache->table_size - 1; - const uint32_t start = (*(uint32_t *) entry->sha1); - - /* We'll always be able to insert when we get here. */ - assert(cache->kernel_count < cache->table_size / 2); - - for (uint32_t i = 0; i < cache->table_size; i++) { - const uint32_t index = (start + i) & mask; - if (cache->table[index] == ~0) { - cache->table[index] = entry_offset; - break; - } - } - - /* We don't include the alignment padding bytes when we serialize, so - * don't include taht in the the total size. */ - cache->total_size += - sizeof(*entry) + entry->prog_data_size + entry->kernel_size; - cache->kernel_count++; -} - -static VkResult -anv_pipeline_cache_grow(struct anv_pipeline_cache *cache) -{ - const uint32_t table_size = cache->table_size * 2; - const uint32_t old_table_size = cache->table_size; - const size_t byte_size = table_size * sizeof(cache->table[0]); - uint32_t *table; - uint32_t *old_table = cache->table; - - table = malloc(byte_size); - if (table == NULL) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cache->table = table; - cache->table_size = table_size; - cache->kernel_count = 0; - cache->total_size = 0; - - memset(cache->table, 0xff, byte_size); - for (uint32_t i = 0; i < old_table_size; i++) { - const uint32_t offset = old_table[i]; - if (offset == ~0) - continue; - - struct cache_entry *entry = - cache->program_stream.block_pool->map + offset; - anv_pipeline_cache_add_entry(cache, entry, offset); - } - - free(old_table); - - return VK_SUCCESS; -} - -uint32_t -anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, size_t kernel_size, - const void *prog_data, size_t prog_data_size) -{ - pthread_mutex_lock(&cache->mutex); - struct cache_entry *entry; - - /* Meta pipelines don't have SPIR-V, so we can't hash them. - * Consequentally, they just don't get cached. - */ - const uint32_t preamble_size = sha1 ? - align_u32(sizeof(*entry) + prog_data_size, 64) : - 0; - - const uint32_t size = preamble_size + kernel_size; - - assert(size < cache->program_stream.block_pool->block_size); - const struct anv_state state = - anv_state_stream_alloc(&cache->program_stream, size, 64); - - if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { - assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); - entry = state.map; - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, prog_data, prog_data_size); - entry->kernel_size = kernel_size; - - if (cache->kernel_count == cache->table_size / 2) - anv_pipeline_cache_grow(cache); - - /* Failing to grow that hash table isn't fatal, but may mean we don't - * have enough space to add this new kernel. Only add it if there's room. - */ - if (cache->kernel_count < cache->table_size / 2) - anv_pipeline_cache_add_entry(cache, entry, state.offset); - } - - pthread_mutex_unlock(&cache->mutex); - - memcpy(state.map + preamble_size, kernel, kernel_size); - - if (!cache->device->info.has_llc) - anv_state_clflush(state); - - return state.offset + preamble_size; -} - -static void -anv_pipeline_cache_load(struct anv_pipeline_cache *cache, - const void *data, size_t size) -{ - struct anv_device *device = cache->device; - uint8_t uuid[VK_UUID_SIZE]; - struct { - uint32_t device_id; - uint8_t uuid[VK_UUID_SIZE]; - } header; - - if (size < sizeof(header)) - return; - memcpy(&header, data, sizeof(header)); - if (header.device_id != device->chipset_id) - return; - anv_device_get_cache_uuid(uuid); - if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0) - return; - - const void *end = data + size; - const void *p = data + sizeof(header); - - while (p < end) { - /* The kernels aren't 64 byte aligned in the serialized format so - * they're always right after the prog_data. - */ - const struct cache_entry *entry = p; - const void *kernel = &entry->prog_data[entry->prog_data_size]; - - anv_pipeline_cache_upload_kernel(cache, entry->sha1, - kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); - p = kernel + entry->kernel_size; - } -} - -VkResult anv_CreatePipelineCache( - VkDevice _device, - const VkPipelineCacheCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipelineCache* pPipelineCache) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline_cache *cache; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - - cache = anv_alloc2(&device->alloc, pAllocator, - sizeof(*cache), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cache == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - anv_pipeline_cache_init(cache, device); - - if (pCreateInfo->initialDataSize > 0) - anv_pipeline_cache_load(cache, - pCreateInfo->pInitialData, - pCreateInfo->initialDataSize); - - *pPipelineCache = anv_pipeline_cache_to_handle(cache); - - return VK_SUCCESS; -} - -void anv_DestroyPipelineCache( - VkDevice _device, - VkPipelineCache _cache, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - anv_pipeline_cache_finish(cache); - - anv_free2(&device->alloc, pAllocator, cache); -} - -VkResult anv_GetPipelineCacheData( - VkDevice _device, - VkPipelineCache _cache, - size_t* pDataSize, - void* pData) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); - - const size_t size = 4 + VK_UUID_SIZE + cache->total_size; - - if (pData == NULL) { - *pDataSize = size; - return VK_SUCCESS; - } - - if (*pDataSize < size) { - *pDataSize = 0; - return VK_INCOMPLETE; - } - - void *p = pData; - memcpy(p, &device->chipset_id, sizeof(device->chipset_id)); - p += sizeof(device->chipset_id); - - anv_device_get_cache_uuid(p); - p += VK_UUID_SIZE; - - struct cache_entry *entry; - for (uint32_t i = 0; i < cache->table_size; i++) { - if (cache->table[i] == ~0) - continue; - - entry = cache->program_stream.block_pool->map + cache->table[i]; - - memcpy(p, entry, sizeof(*entry) + entry->prog_data_size); - p += sizeof(*entry) + entry->prog_data_size; - - void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - - memcpy(p, kernel, entry->kernel_size); - p += entry->kernel_size; - } - - return VK_SUCCESS; -} - -static void -anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, - struct anv_pipeline_cache *src) -{ - for (uint32_t i = 0; i < src->table_size; i++) { - if (src->table[i] == ~0) - continue; - - struct cache_entry *entry = - src->program_stream.block_pool->map + src->table[i]; - - if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL) - continue; - - const void *kernel = (void *) entry + - align_u32(sizeof(*entry) + entry->prog_data_size, 64); - anv_pipeline_cache_upload_kernel(dst, entry->sha1, - kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); - } -} - -VkResult anv_MergePipelineCaches( - VkDevice _device, - VkPipelineCache destCache, - uint32_t srcCacheCount, - const VkPipelineCache* pSrcCaches) -{ - ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); - - for (uint32_t i = 0; i < srcCacheCount; i++) { - ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); - - anv_pipeline_cache_merge(dst, src); - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h deleted file mode 100644 index ba86333..0000000 --- a/src/vulkan/anv_private.h +++ /dev/null @@ -1,1876 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_VALGRIND -#include -#include -#define VG(x) x -#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) -#else -#define VG(x) -#endif - -#include "brw_device_info.h" -#include "util/macros.h" -#include "util/list.h" - -/* Pre-declarations needed for WSI entrypoints */ -struct wl_surface; -struct wl_display; -typedef struct xcb_connection_t xcb_connection_t; -typedef uint32_t xcb_visualid_t; -typedef uint32_t xcb_window_t; - -#define VK_USE_PLATFORM_XCB_KHR -#define VK_USE_PLATFORM_WAYLAND_KHR - -#define VK_PROTOTYPES -#include -#include -#include - -#include "anv_entrypoints.h" -#include "anv_gen_macros.h" -#include "brw_context.h" -#include "isl/isl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_VBS 32 -#define MAX_SETS 8 -#define MAX_RTS 8 -#define MAX_VIEWPORTS 16 -#define MAX_SCISSORS 16 -#define MAX_PUSH_CONSTANTS_SIZE 128 -#define MAX_DYNAMIC_BUFFERS 16 -#define MAX_IMAGES 8 -#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */ - -#define anv_noreturn __attribute__((__noreturn__)) -#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static inline uint32_t -align_u32(uint32_t v, uint32_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -static inline uint64_t -align_u64(uint64_t v, uint64_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -static inline int32_t -align_i32(int32_t v, int32_t a) -{ - assert(a != 0 && a == (a & -a)); - return (v + a - 1) & ~(a - 1); -} - -/** Alignment must be a power of 2. */ -static inline bool -anv_is_aligned(uintmax_t n, uintmax_t a) -{ - assert(a == (a & -a)); - return (n & (a - 1)) == 0; -} - -static inline uint32_t -anv_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX(n >> levels, 1); -} - -static inline float -anv_clamp_f(float f, float min, float max) -{ - assert(min < max); - - if (f > max) - return max; - else if (f < min) - return min; - else - return f; -} - -static inline bool -anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) -{ - if (*inout_mask & clear_mask) { - *inout_mask &= ~clear_mask; - return true; - } else { - return false; - } -} - -#define for_each_bit(b, dword) \ - for (uint32_t __dword = (dword); \ - (b) = __builtin_ffs(__dword) - 1, __dword; \ - __dword &= ~(1 << (b))) - -#define typed_memcpy(dest, src, count) ({ \ - static_assert(sizeof(*src) == sizeof(*dest), ""); \ - memcpy((dest), (src), (count) * sizeof(*(src))); \ -}) - -#define zero(x) (memset(&(x), 0, sizeof(x))) - -/* Define no kernel as 1, since that's an illegal offset for a kernel */ -#define NO_KERNEL 1 - -struct anv_common { - VkStructureType sType; - const void* pNext; -}; - -/* Whenever we generate an error, pass it through this function. Useful for - * debugging, where we can break on it. Only call at error site, not when - * propagating errors. Might be useful to plug in a stack trace here. - */ - -VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...); - -#ifdef DEBUG -#define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL); -#define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__); -#else -#define vk_error(error) error -#define vk_errorf(error, format, ...) error -#endif - -void __anv_finishme(const char *file, int line, const char *format, ...) - anv_printflike(3, 4); -void anv_loge(const char *format, ...) anv_printflike(1, 2); -void anv_loge_v(const char *format, va_list va); - -/** - * Print a FINISHME message, including its source location. - */ -#define anv_finishme(format, ...) \ - __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); - -/* A non-fatal assert. Useful for debugging. */ -#ifdef DEBUG -#define anv_assert(x) ({ \ - if (unlikely(!(x))) \ - fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ -}) -#else -#define anv_assert(x) -#endif - -/** - * If a block of code is annotated with anv_validate, then the block runs only - * in debug builds. - */ -#ifdef DEBUG -#define anv_validate if (1) -#else -#define anv_validate if (0) -#endif - -void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); -void anv_abortfv(const char *format, va_list va) anv_noreturn; - -#define stub_return(v) \ - do { \ - anv_finishme("stub %s", __func__); \ - return (v); \ - } while (0) - -#define stub() \ - do { \ - anv_finishme("stub %s", __func__); \ - return; \ - } while (0) - -/** - * A dynamically growable, circular buffer. Elements are added at head and - * removed from tail. head and tail are free-running uint32_t indices and we - * only compute the modulo with size when accessing the array. This way, - * number of bytes in the queue is always head - tail, even in case of - * wraparound. - */ - -struct anv_vector { - uint32_t head; - uint32_t tail; - uint32_t element_size; - uint32_t size; - void *data; -}; - -int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); -void *anv_vector_add(struct anv_vector *queue); -void *anv_vector_remove(struct anv_vector *queue); - -static inline int -anv_vector_length(struct anv_vector *queue) -{ - return (queue->head - queue->tail) / queue->element_size; -} - -static inline void * -anv_vector_head(struct anv_vector *vector) -{ - assert(vector->tail < vector->head); - return (void *)((char *)vector->data + - ((vector->head - vector->element_size) & - (vector->size - 1))); -} - -static inline void * -anv_vector_tail(struct anv_vector *vector) -{ - return (void *)((char *)vector->data + (vector->tail & (vector->size - 1))); -} - -static inline void -anv_vector_finish(struct anv_vector *queue) -{ - free(queue->data); -} - -#define anv_vector_foreach(elem, queue) \ - static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ - for (uint32_t __anv_vector_offset = (queue)->tail; \ - elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ - __anv_vector_offset += (queue)->element_size) - -struct anv_bo { - uint32_t gem_handle; - - /* Index into the current validation list. This is used by the - * validation list building alrogithm to track which buffers are already - * in the validation list so that we can ensure uniqueness. - */ - uint32_t index; - - /* Last known offset. This value is provided by the kernel when we - * execbuf and is used as the presumed offset for the next bunch of - * relocations. - */ - uint64_t offset; - - uint64_t size; - void *map; - - /* We need to set the WRITE flag on winsys bos so GEM will know we're - * writing to them and synchronize uses on other rings (eg if the display - * server uses the blitter ring). - */ - bool is_winsys_bo; -}; - -/* Represents a lock-free linked list of "free" things. This is used by - * both the block pool and the state pools. Unfortunately, in order to - * solve the ABA problem, we can't use a single uint32_t head. - */ -union anv_free_list { - struct { - int32_t offset; - - /* A simple count that is incremented every time the head changes. */ - uint32_t count; - }; - uint64_t u64; -}; - -#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) - -struct anv_block_state { - union { - struct { - uint32_t next; - uint32_t end; - }; - uint64_t u64; - }; -}; - -struct anv_block_pool { - struct anv_device *device; - - struct anv_bo bo; - - /* The offset from the start of the bo to the "center" of the block - * pool. Pointers to allocated blocks are given by - * bo.map + center_bo_offset + offsets. - */ - uint32_t center_bo_offset; - - /* Current memory map of the block pool. This pointer may or may not - * point to the actual beginning of the block pool memory. If - * anv_block_pool_alloc_back has ever been called, then this pointer - * will point to the "center" position of the buffer and all offsets - * (negative or positive) given out by the block pool alloc functions - * will be valid relative to this pointer. - * - * In particular, map == bo.map + center_offset - */ - void *map; - int fd; - - /** - * Array of mmaps and gem handles owned by the block pool, reclaimed when - * the block pool is destroyed. - */ - struct anv_vector mmap_cleanups; - - uint32_t block_size; - - union anv_free_list free_list; - struct anv_block_state state; - - union anv_free_list back_free_list; - struct anv_block_state back_state; -}; - -/* Block pools are backed by a fixed-size 2GB memfd */ -#define BLOCK_POOL_MEMFD_SIZE (1ull << 32) - -/* The center of the block pool is also the middle of the memfd. This may - * change in the future if we decide differently for some reason. - */ -#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2) - -static inline uint32_t -anv_block_pool_size(struct anv_block_pool *pool) -{ - return pool->state.end + pool->back_state.end; -} - -struct anv_state { - int32_t offset; - uint32_t alloc_size; - void *map; -}; - -struct anv_fixed_size_state_pool { - size_t state_size; - union anv_free_list free_list; - struct anv_block_state block; -}; - -#define ANV_MIN_STATE_SIZE_LOG2 6 -#define ANV_MAX_STATE_SIZE_LOG2 10 - -#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) - -struct anv_state_pool { - struct anv_block_pool *block_pool; - struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; -}; - -struct anv_state_stream_block; - -struct anv_state_stream { - struct anv_block_pool *block_pool; - - /* The current working block */ - struct anv_state_stream_block *block; - - /* Offset at which the current block starts */ - uint32_t start; - /* Offset at which to allocate the next state */ - uint32_t next; - /* Offset at which the current block ends */ - uint32_t end; -}; - -#define CACHELINE_SIZE 64 -#define CACHELINE_MASK 63 - -static inline void -anv_clflush_range(void *start, size_t size) -{ - void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK); - void *end = start + size; - - __builtin_ia32_mfence(); - while (p < end) { - __builtin_ia32_clflush(p); - p += CACHELINE_SIZE; - } -} - -static void inline -anv_state_clflush(struct anv_state state) -{ - anv_clflush_range(state.map, state.alloc_size); -} - -void anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_block_pool_finish(struct anv_block_pool *pool); -int32_t anv_block_pool_alloc(struct anv_block_pool *pool); -int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); -void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset); -void anv_state_pool_init(struct anv_state_pool *pool, - struct anv_block_pool *block_pool); -void anv_state_pool_finish(struct anv_state_pool *pool); -struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, - size_t state_size, size_t alignment); -void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); -void anv_state_stream_init(struct anv_state_stream *stream, - struct anv_block_pool *block_pool); -void anv_state_stream_finish(struct anv_state_stream *stream); -struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, - uint32_t size, uint32_t alignment); - -/** - * Implements a pool of re-usable BOs. The interface is identical to that - * of block_pool except that each block is its own BO. - */ -struct anv_bo_pool { - struct anv_device *device; - - uint32_t bo_size; - - void *free_list; -}; - -void anv_bo_pool_init(struct anv_bo_pool *pool, - struct anv_device *device, uint32_t block_size); -void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); -void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); - - -void *anv_resolve_entrypoint(uint32_t index); - -extern struct anv_dispatch_table dtable; - -#define ANV_CALL(func) ({ \ - if (dtable.func == NULL) { \ - size_t idx = offsetof(struct anv_dispatch_table, func) / sizeof(void *); \ - dtable.entrypoints[idx] = anv_resolve_entrypoint(idx); \ - } \ - dtable.func; \ -}) - -static inline void * -anv_alloc(const VkAllocationCallbacks *alloc, - size_t size, size_t align, - VkSystemAllocationScope scope) -{ - return alloc->pfnAllocation(alloc->pUserData, size, align, scope); -} - -static inline void * -anv_realloc(const VkAllocationCallbacks *alloc, - void *ptr, size_t size, size_t align, - VkSystemAllocationScope scope) -{ - return alloc->pfnReallocation(alloc->pUserData, ptr, size, align, scope); -} - -static inline void -anv_free(const VkAllocationCallbacks *alloc, void *data) -{ - alloc->pfnFree(alloc->pUserData, data); -} - -static inline void * -anv_alloc2(const VkAllocationCallbacks *parent_alloc, - const VkAllocationCallbacks *alloc, - size_t size, size_t align, - VkSystemAllocationScope scope) -{ - if (alloc) - return anv_alloc(alloc, size, align, scope); - else - return anv_alloc(parent_alloc, size, align, scope); -} - -static inline void -anv_free2(const VkAllocationCallbacks *parent_alloc, - const VkAllocationCallbacks *alloc, - void *data) -{ - if (alloc) - anv_free(alloc, data); - else - anv_free(parent_alloc, data); -} - -struct anv_physical_device { - VK_LOADER_DATA _loader_data; - - struct anv_instance * instance; - uint32_t chipset_id; - const char * path; - const char * name; - const struct brw_device_info * info; - uint64_t aperture_size; - struct brw_compiler * compiler; - struct isl_device isl_dev; -}; - -struct anv_wsi_interaface; - -#define VK_ICD_WSI_PLATFORM_MAX 5 - -struct anv_instance { - VK_LOADER_DATA _loader_data; - - VkAllocationCallbacks alloc; - - uint32_t apiVersion; - int physicalDeviceCount; - struct anv_physical_device physicalDevice; - - struct anv_wsi_interface * wsi[VK_ICD_WSI_PLATFORM_MAX]; -}; - -VkResult anv_init_wsi(struct anv_instance *instance); -void anv_finish_wsi(struct anv_instance *instance); - -struct anv_meta_state { - VkAllocationCallbacks alloc; - - /** - * Use array element `i` for images with `2^i` samples. - */ - struct { - /** - * Pipeline N is used to clear color attachment N of the current - * subpass. - * - * HACK: We use one pipeline per color attachment to work around the - * compiler's inability to dynamically set the render target index of - * the render target write message. - */ - struct anv_pipeline *color_pipelines[MAX_RTS]; - - struct anv_pipeline *depth_only_pipeline; - struct anv_pipeline *stencil_only_pipeline; - struct anv_pipeline *depthstencil_pipeline; - } clear[1 + MAX_SAMPLES_LOG2]; - - struct { - VkRenderPass render_pass; - - /** Pipeline that blits from a 1D image. */ - VkPipeline pipeline_1d_src; - - /** Pipeline that blits from a 2D image. */ - VkPipeline pipeline_2d_src; - - /** Pipeline that blits from a 3D image. */ - VkPipeline pipeline_3d_src; - - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; - } blit; - - struct { - /** Pipeline [i] resolves an image with 2^(i+1) samples. */ - VkPipeline pipelines[MAX_SAMPLES_LOG2]; - - VkRenderPass pass; - VkPipelineLayout pipeline_layout; - VkDescriptorSetLayout ds_layout; - } resolve; -}; - -struct anv_queue { - VK_LOADER_DATA _loader_data; - - struct anv_device * device; - - struct anv_state_pool * pool; -}; - -struct anv_pipeline_cache { - struct anv_device * device; - struct anv_state_stream program_stream; - pthread_mutex_t mutex; - - uint32_t total_size; - uint32_t table_size; - uint32_t kernel_count; - uint32_t *table; -}; - -void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, - struct anv_device *device); -void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); -uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data); -uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, - const unsigned char *sha1, - const void *kernel, - size_t kernel_size, - const void *prog_data, - size_t prog_data_size); - -struct anv_device { - VK_LOADER_DATA _loader_data; - - VkAllocationCallbacks alloc; - - struct anv_instance * instance; - uint32_t chipset_id; - struct brw_device_info info; - struct isl_device isl_dev; - int context_id; - int fd; - - struct anv_bo_pool batch_bo_pool; - - struct anv_block_pool dynamic_state_block_pool; - struct anv_state_pool dynamic_state_pool; - - struct anv_block_pool instruction_block_pool; - struct anv_pipeline_cache default_pipeline_cache; - - struct anv_block_pool surface_state_block_pool; - struct anv_state_pool surface_state_pool; - - struct anv_bo workaround_bo; - - struct anv_meta_state meta_state; - - struct anv_state border_colors; - - struct anv_queue queue; - - struct anv_block_pool scratch_block_pool; - - pthread_mutex_t mutex; -}; - -VkResult gen7_init_device_state(struct anv_device *device); -VkResult gen75_init_device_state(struct anv_device *device); -VkResult gen8_init_device_state(struct anv_device *device); -VkResult gen9_init_device_state(struct anv_device *device); - -void anv_device_get_cache_uuid(void *uuid); - - -void* anv_gem_mmap(struct anv_device *device, - uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags); -void anv_gem_munmap(void *p, uint64_t size); -uint32_t anv_gem_create(struct anv_device *device, size_t size); -void anv_gem_close(struct anv_device *device, uint32_t gem_handle); -uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size); -int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns); -int anv_gem_execbuffer(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf); -int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle, - uint32_t stride, uint32_t tiling); -int anv_gem_create_context(struct anv_device *device); -int anv_gem_destroy_context(struct anv_device *device, int context); -int anv_gem_get_param(int fd, uint32_t param); -bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling); -int anv_gem_get_aperture(int fd, uint64_t *size); -int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle); -uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd); -int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching); -int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle, - uint32_t read_domains, uint32_t write_domain); - -VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); - -struct anv_reloc_list { - size_t num_relocs; - size_t array_length; - struct drm_i915_gem_relocation_entry * relocs; - struct anv_bo ** reloc_bos; -}; - -VkResult anv_reloc_list_init(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc); -void anv_reloc_list_finish(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc); - -uint64_t anv_reloc_list_add(struct anv_reloc_list *list, - const VkAllocationCallbacks *alloc, - uint32_t offset, struct anv_bo *target_bo, - uint32_t delta); - -struct anv_batch_bo { - /* Link in the anv_cmd_buffer.owned_batch_bos list */ - struct list_head link; - - struct anv_bo bo; - - /* Bytes actually consumed in this batch BO */ - size_t length; - - /* Last seen surface state block pool bo offset */ - uint32_t last_ss_pool_bo_offset; - - struct anv_reloc_list relocs; -}; - -struct anv_batch { - const VkAllocationCallbacks * alloc; - - void * start; - void * end; - void * next; - - struct anv_reloc_list * relocs; - - /* This callback is called (with the associated user data) in the event - * that the batch runs out of space. - */ - VkResult (*extend_cb)(struct anv_batch *, void *); - void * user_data; -}; - -void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); -void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); -uint64_t anv_batch_emit_reloc(struct anv_batch *batch, - void *location, struct anv_bo *bo, uint32_t offset); -VkResult anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch); - -struct anv_address { - struct anv_bo *bo; - uint32_t offset; -}; - -#define __gen_address_type struct anv_address -#define __gen_user_data struct anv_batch - -static inline uint64_t -__gen_combine_address(struct anv_batch *batch, void *location, - const struct anv_address address, uint32_t delta) -{ - if (address.bo == NULL) { - return address.offset + delta; - } else { - assert(batch->start <= location && location < batch->end); - - return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); - } -} - -/* Wrapper macros needed to work around preprocessor argument issues. In - * particular, arguments don't get pre-evaluated if they are concatenated. - * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the - * GENX macro won't get evaluated if the emit macro contains "cmd ## foo". - * We can work around this easily enough with these helpers. - */ -#define __anv_cmd_length(cmd) cmd ## _length -#define __anv_cmd_length_bias(cmd) cmd ## _length_bias -#define __anv_cmd_header(cmd) cmd ## _header -#define __anv_cmd_pack(cmd) cmd ## _pack - -#define anv_batch_emit(batch, cmd, ...) do { \ - void *__dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \ - struct cmd __template = { \ - __anv_cmd_header(cmd), \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(batch, __dst, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, __anv_cmd_length(cmd) * 4)); \ - } while (0) - -#define anv_batch_emitn(batch, n, cmd, ...) ({ \ - void *__dst = anv_batch_emit_dwords(batch, n); \ - struct cmd __template = { \ - __anv_cmd_header(cmd), \ - .DWordLength = n - __anv_cmd_length_bias(cmd), \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(batch, __dst, &__template); \ - __dst; \ - }) - -#define anv_batch_emit_merge(batch, dwords0, dwords1) \ - do { \ - uint32_t *dw; \ - \ - static_assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1), "mismatch merge"); \ - dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ - for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ - dw[i] = (dwords0)[i] | (dwords1)[i]; \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ - } while (0) - -#define anv_state_pool_emit(pool, cmd, align, ...) ({ \ - const uint32_t __size = __anv_cmd_length(cmd) * 4; \ - struct anv_state __state = \ - anv_state_pool_alloc((pool), __size, align); \ - struct cmd __template = { \ - __VA_ARGS__ \ - }; \ - __anv_cmd_pack(cmd)(NULL, __state.map, &__template); \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(__state.map, __anv_cmd_length(cmd) * 4)); \ - if (!(pool)->block_pool->device->info.has_llc) \ - anv_state_clflush(__state); \ - __state; \ - }) - -#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ - .GraphicsDataTypeGFDT = 0, \ - .LLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} - -#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ - .LLCeLLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} - -#define GEN8_MOCS { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ - } - -/* Skylake: MOCS is now an index into an array of 62 different caching - * configurations programmed by the kernel. - */ - -#define GEN9_MOCS { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ - } - -#define GEN9_MOCS_PTE { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ - } - -struct anv_device_memory { - struct anv_bo bo; - uint32_t type_index; - VkDeviceSize map_size; - void * map; -}; - -/** - * Header for Vertex URB Entry (VUE) - */ -struct anv_vue_header { - uint32_t Reserved; - uint32_t RTAIndex; /* RenderTargetArrayIndex */ - uint32_t ViewportIndex; - float PointWidth; -}; - -struct anv_descriptor_set_binding_layout { - /* Number of array elements in this binding */ - uint16_t array_size; - - /* Index into the flattend descriptor set */ - uint16_t descriptor_index; - - /* Index into the dynamic state array for a dynamic buffer */ - int16_t dynamic_offset_index; - - /* Index into the descriptor set buffer views */ - int16_t buffer_index; - - struct { - /* Index into the binding table for the associated surface */ - int16_t surface_index; - - /* Index into the sampler table for the associated sampler */ - int16_t sampler_index; - - /* Index into the image table for the associated image */ - int16_t image_index; - } stage[MESA_SHADER_STAGES]; - - /* Immutable samplers (or NULL if no immutable samplers) */ - struct anv_sampler **immutable_samplers; -}; - -struct anv_descriptor_set_layout { - /* Number of bindings in this descriptor set */ - uint16_t binding_count; - - /* Total size of the descriptor set with room for all array entries */ - uint16_t size; - - /* Shader stages affected by this descriptor set */ - uint16_t shader_stages; - - /* Number of buffers in this descriptor set */ - uint16_t buffer_count; - - /* Number of dynamic offsets used by this descriptor set */ - uint16_t dynamic_offset_count; - - /* Bindings in this descriptor set */ - struct anv_descriptor_set_binding_layout binding[0]; -}; - -struct anv_descriptor { - VkDescriptorType type; - - union { - struct { - struct anv_image_view *image_view; - struct anv_sampler *sampler; - }; - - struct anv_buffer_view *buffer_view; - }; -}; - -struct anv_descriptor_set { - const struct anv_descriptor_set_layout *layout; - uint32_t buffer_count; - struct anv_buffer_view *buffer_views; - struct anv_descriptor descriptors[0]; -}; - -VkResult -anv_descriptor_set_create(struct anv_device *device, - const struct anv_descriptor_set_layout *layout, - struct anv_descriptor_set **out_set); - -void -anv_descriptor_set_destroy(struct anv_device *device, - struct anv_descriptor_set *set); - -struct anv_pipeline_binding { - /* The descriptor set this surface corresponds to */ - uint16_t set; - - /* Offset into the descriptor set */ - uint16_t offset; -}; - -struct anv_pipeline_layout { - struct { - struct anv_descriptor_set_layout *layout; - uint32_t dynamic_offset_start; - } set[MAX_SETS]; - - uint32_t num_sets; - - struct { - bool has_dynamic_offsets; - } stage[MESA_SHADER_STAGES]; -}; - -struct anv_buffer { - struct anv_device * device; - VkDeviceSize size; - - VkBufferUsageFlags usage; - - /* Set when bound */ - struct anv_bo * bo; - VkDeviceSize offset; -}; - -enum anv_cmd_dirty_bits { - ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */ - ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */ - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */ - ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */ - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */ - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */ - ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1, - ANV_CMD_DIRTY_PIPELINE = 1 << 9, - ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, - ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, -}; -typedef uint32_t anv_cmd_dirty_mask_t; - -struct anv_vertex_binding { - struct anv_buffer * buffer; - VkDeviceSize offset; -}; - -struct anv_push_constants { - /* Current allocated size of this push constants data structure. - * Because a decent chunk of it may not be used (images on SKL, for - * instance), we won't actually allocate the entire structure up-front. - */ - uint32_t size; - - /* Push constant data provided by the client through vkPushConstants */ - uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; - - /* Our hardware only provides zero-based vertex and instance id so, in - * order to satisfy the vulkan requirements, we may have to push one or - * both of these into the shader. - */ - uint32_t base_vertex; - uint32_t base_instance; - - /* Offsets and ranges for dynamically bound buffers */ - struct { - uint32_t offset; - uint32_t range; - } dynamic[MAX_DYNAMIC_BUFFERS]; - - /* Image data for image_load_store on pre-SKL */ - struct brw_image_param images[MAX_IMAGES]; -}; - -struct anv_dynamic_state { - struct { - uint32_t count; - VkViewport viewports[MAX_VIEWPORTS]; - } viewport; - - struct { - uint32_t count; - VkRect2D scissors[MAX_SCISSORS]; - } scissor; - - float line_width; - - struct { - float bias; - float clamp; - float slope; - } depth_bias; - - float blend_constants[4]; - - struct { - float min; - float max; - } depth_bounds; - - struct { - uint32_t front; - uint32_t back; - } stencil_compare_mask; - - struct { - uint32_t front; - uint32_t back; - } stencil_write_mask; - - struct { - uint32_t front; - uint32_t back; - } stencil_reference; -}; - -extern const struct anv_dynamic_state default_dynamic_state; - -void anv_dynamic_state_copy(struct anv_dynamic_state *dest, - const struct anv_dynamic_state *src, - uint32_t copy_mask); - -/** - * Attachment state when recording a renderpass instance. - * - * The clear value is valid only if there exists a pending clear. - */ -struct anv_attachment_state { - VkImageAspectFlags pending_clear_aspects; - VkClearValue clear_value; -}; - -/** State required while building cmd buffer */ -struct anv_cmd_state { - /* PIPELINE_SELECT.PipelineSelection */ - uint32_t current_pipeline; - uint32_t current_l3_config; - uint32_t vb_dirty; - anv_cmd_dirty_mask_t dirty; - anv_cmd_dirty_mask_t compute_dirty; - uint32_t num_workgroups_offset; - struct anv_bo *num_workgroups_bo; - VkShaderStageFlags descriptors_dirty; - VkShaderStageFlags push_constants_dirty; - uint32_t scratch_size; - struct anv_pipeline * pipeline; - struct anv_pipeline * compute_pipeline; - struct anv_framebuffer * framebuffer; - struct anv_render_pass * pass; - struct anv_subpass * subpass; - uint32_t restart_index; - struct anv_vertex_binding vertex_bindings[MAX_VBS]; - struct anv_descriptor_set * descriptors[MAX_SETS]; - struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; - struct anv_state binding_tables[MESA_SHADER_STAGES]; - struct anv_state samplers[MESA_SHADER_STAGES]; - struct anv_dynamic_state dynamic; - bool need_query_wa; - - /** - * Array length is anv_cmd_state::pass::attachment_count. Array content is - * valid only when recording a render pass instance. - */ - struct anv_attachment_state * attachments; - - struct { - struct anv_buffer * index_buffer; - uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ - uint32_t index_offset; - } gen7; -}; - -struct anv_cmd_pool { - VkAllocationCallbacks alloc; - struct list_head cmd_buffers; -}; - -#define ANV_CMD_BUFFER_BATCH_SIZE 8192 - -enum anv_cmd_buffer_exec_mode { - ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, - ANV_CMD_BUFFER_EXEC_MODE_EMIT, - ANV_CMD_BUFFER_EXEC_MODE_CHAIN, - ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, -}; - -struct anv_cmd_buffer { - VK_LOADER_DATA _loader_data; - - struct anv_device * device; - - struct anv_cmd_pool * pool; - struct list_head pool_link; - - struct anv_batch batch; - - /* Fields required for the actual chain of anv_batch_bo's. - * - * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). - */ - struct list_head batch_bos; - enum anv_cmd_buffer_exec_mode exec_mode; - - /* A vector of anv_batch_bo pointers for every batch or surface buffer - * referenced by this command buffer - * - * initialized by anv_cmd_buffer_init_batch_bo_chain() - */ - struct anv_vector seen_bbos; - - /* A vector of int32_t's for every block of binding tables. - * - * initialized by anv_cmd_buffer_init_batch_bo_chain() - */ - struct anv_vector bt_blocks; - uint32_t bt_next; - struct anv_reloc_list surface_relocs; - - /* Information needed for execbuf - * - * These fields are generated by anv_cmd_buffer_prepare_execbuf(). - */ - struct { - struct drm_i915_gem_execbuffer2 execbuf; - - struct drm_i915_gem_exec_object2 * objects; - uint32_t bo_count; - struct anv_bo ** bos; - - /* Allocated length of the 'objects' and 'bos' arrays */ - uint32_t array_length; - - bool need_reloc; - } execbuf2; - - /* Serial for tracking buffer completion */ - uint32_t serial; - - /* Stream objects for storing temporary data */ - struct anv_state_stream surface_state_stream; - struct anv_state_stream dynamic_state_stream; - - VkCommandBufferUsageFlags usage_flags; - VkCommandBufferLevel level; - - struct anv_cmd_state state; -}; - -VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, - struct anv_cmd_buffer *secondary); -void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); - -VkResult anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *bt_state); -VkResult anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, - unsigned stage, struct anv_state *state); -uint32_t gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages); - -struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, - const void *data, uint32_t size, uint32_t alignment); -struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, - uint32_t *a, uint32_t *b, - uint32_t dwords, uint32_t alignment); - -struct anv_address -anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer); -struct anv_state -anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, - uint32_t entries, uint32_t *state_offset); -struct anv_state -anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer); -struct anv_state -anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t size, uint32_t alignment); - -VkResult -anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer); - -void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer); -void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_state_setup_attachments(struct anv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info); - -void gen7_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen75_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen8_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void gen9_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); -void anv_cmd_buffer_set_subpass(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass); - -void gen7_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen75_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen8_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); -void gen9_flush_pipeline_select_3d(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer); - -void gen7_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen75_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); -void gen9_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer); - -struct anv_state -anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage); -struct anv_state -anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); - -const struct anv_image_view * -anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); - -void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); - -struct anv_fence { - struct anv_bo bo; - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; -}; - -struct anv_event { - uint64_t semaphore; - struct anv_state state; -}; - -struct nir_shader; - -struct anv_shader_module { - struct nir_shader * nir; - - unsigned char sha1[20]; - uint32_t size; - char data[0]; -}; - -void anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info); - -static inline gl_shader_stage -vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) -{ - assert(__builtin_popcount(vk_stage) == 1); - return ffs(vk_stage) - 1; -} - -static inline VkShaderStageFlagBits -mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) -{ - return (1 << mesa_stage); -} - -#define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) - -#define anv_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, \ - __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \ - stage = __builtin_ffs(__tmp) - 1, __tmp; \ - __tmp &= ~(1 << (stage))) - -struct anv_pipeline_bind_map { - uint32_t surface_count; - uint32_t sampler_count; - uint32_t image_count; - - struct anv_pipeline_binding * surface_to_descriptor; - struct anv_pipeline_binding * sampler_to_descriptor; -}; - -struct anv_pipeline { - struct anv_device * device; - struct anv_batch batch; - uint32_t batch_data[512]; - struct anv_reloc_list batch_relocs; - uint32_t dynamic_state_mask; - struct anv_dynamic_state dynamic_state; - - struct anv_pipeline_layout * layout; - struct anv_pipeline_bind_map bindings[MESA_SHADER_STAGES]; - - bool use_repclear; - - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; - bool writes_point_size; - struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; - uint32_t scratch_start[MESA_SHADER_STAGES]; - uint32_t total_scratch; - struct { - uint32_t vs_start; - uint32_t vs_size; - uint32_t nr_vs_entries; - uint32_t gs_start; - uint32_t gs_size; - uint32_t nr_gs_entries; - } urb; - - VkShaderStageFlags active_stages; - struct anv_state blend_state; - uint32_t vs_simd8; - uint32_t vs_vec4; - uint32_t ps_simd8; - uint32_t ps_simd16; - uint32_t ps_ksp0; - uint32_t ps_ksp2; - uint32_t ps_grf_start0; - uint32_t ps_grf_start2; - uint32_t gs_kernel; - uint32_t cs_simd; - - uint32_t vb_used; - uint32_t binding_stride[MAX_VBS]; - bool instancing_enable[MAX_VBS]; - bool primitive_restart; - uint32_t topology; - - uint32_t cs_thread_width_max; - uint32_t cs_right_mask; - - struct { - uint32_t sf[7]; - uint32_t depth_stencil_state[3]; - } gen7; - - struct { - uint32_t sf[4]; - uint32_t raster[5]; - uint32_t wm_depth_stencil[3]; - } gen8; - - struct { - uint32_t wm_depth_stencil[4]; - } gen9; -}; - -struct anv_graphics_pipeline_create_info { - /** - * If non-negative, overrides the color attachment count of the pipeline's - * subpass. - */ - int8_t color_attachment_count; - - bool use_repclear; - bool disable_viewport; - bool disable_scissor; - bool disable_vs; - bool use_rectlist; -}; - -VkResult -anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc); - -VkResult -anv_pipeline_compile_cs(struct anv_pipeline *pipeline, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *info, - struct anv_shader_module *module, - const char *entrypoint, - const VkSpecializationInfo *spec_info); - -VkResult -anv_graphics_pipeline_create(VkDevice device, - VkPipelineCache cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen7_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen75_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_graphics_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen7_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen75_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -VkResult -gen8_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); -VkResult -gen9_compute_pipeline_create(VkDevice _device, - struct anv_pipeline_cache *cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *alloc, - VkPipeline *pPipeline); - -struct anv_format_swizzle { - unsigned r:2; - unsigned g:2; - unsigned b:2; - unsigned a:2; -}; - -struct anv_format { - const VkFormat vk_format; - const char *name; - enum isl_format isl_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ - const struct isl_format_layout *isl_layout; - struct anv_format_swizzle swizzle; - bool has_depth; - bool has_stencil; -}; - -const struct anv_format * -anv_format_for_vk_format(VkFormat format); - -enum isl_format -anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect, - VkImageTiling tiling, struct anv_format_swizzle *swizzle); - -static inline bool -anv_format_is_color(const struct anv_format *format) -{ - return !format->has_depth && !format->has_stencil; -} - -static inline bool -anv_format_is_depth_or_stencil(const struct anv_format *format) -{ - return format->has_depth || format->has_stencil; -} - -/** - * Subsurface of an anv_image. - */ -struct anv_surface { - struct isl_surf isl; - - /** - * Offset from VkImage's base address, as bound by vkBindImageMemory(). - */ - uint32_t offset; -}; - -struct anv_image { - VkImageType type; - /* The original VkFormat provided by the client. This may not match any - * of the actual surface formats. - */ - VkFormat vk_format; - const struct anv_format *format; - VkExtent3D extent; - uint32_t levels; - uint32_t array_size; - uint32_t samples; /**< VkImageCreateInfo::samples */ - VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ - VkImageTiling tiling; /** VkImageCreateInfo::tiling */ - - VkDeviceSize size; - uint32_t alignment; - - /* Set when bound */ - struct anv_bo *bo; - VkDeviceSize offset; - - /** - * Image subsurfaces - * - * For each foo, anv_image::foo_surface is valid if and only if - * anv_image::format has a foo aspect. - * - * The hardware requires that the depth buffer and stencil buffer be - * separate surfaces. From Vulkan's perspective, though, depth and stencil - * reside in the same VkImage. To satisfy both the hardware and Vulkan, we - * allocate the depth and stencil buffers as separate surfaces in the same - * bo. - */ - union { - struct anv_surface color_surface; - - struct { - struct anv_surface depth_surface; - struct anv_surface stencil_surface; - }; - }; -}; - -struct anv_image_view { - const struct anv_image *image; /**< VkImageViewCreateInfo::image */ - struct anv_bo *bo; - uint32_t offset; /**< Offset into bo. */ - - VkImageAspectFlags aspect_mask; - VkFormat vk_format; - VkComponentMapping swizzle; - enum isl_format format; - uint32_t base_layer; - uint32_t base_mip; - VkExtent3D level_0_extent; /**< Extent of ::image's level 0 adjusted for ::vk_format. */ - VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */ - - /** RENDER_SURFACE_STATE when using image as a color render target. */ - struct anv_state color_rt_surface_state; - - /** RENDER_SURFACE_STATE when using image as a sampler surface. */ - struct anv_state sampler_surface_state; - - /** RENDER_SURFACE_STATE when using image as a storage image. */ - struct anv_state storage_surface_state; -}; - -struct anv_image_create_info { - const VkImageCreateInfo *vk_info; - isl_tiling_flags_t isl_tiling_flags; - uint32_t stride; -}; - -VkResult anv_image_create(VkDevice _device, - const struct anv_image_create_info *info, - const VkAllocationCallbacks* alloc, - VkImage *pImage); - -struct anv_surface * -anv_image_get_surface_for_aspect_mask(struct anv_image *image, - VkImageAspectFlags aspect_mask); - -void anv_image_view_init(struct anv_image_view *view, - struct anv_device *device, - const VkImageViewCreateInfo* pCreateInfo, - struct anv_cmd_buffer *cmd_buffer, - uint32_t offset); - -void -anv_fill_image_surface_state(struct anv_device *device, struct anv_state state, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen7_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen75_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen8_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); -void -gen9_fill_image_surface_state(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage); - -struct anv_buffer_view { - enum isl_format format; /**< VkBufferViewCreateInfo::format */ - struct anv_bo *bo; - uint32_t offset; /**< Offset into bo. */ - uint64_t range; /**< VkBufferViewCreateInfo::range */ - - struct anv_state surface_state; - struct anv_state storage_surface_state; -}; - -const struct anv_format * -anv_format_for_descriptor_type(VkDescriptorType type); - -void anv_fill_buffer_surface_state(struct anv_device *device, - struct anv_state state, - enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - -void gen7_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen75_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen8_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); -void gen9_fill_buffer_surface_state(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride); - -void anv_image_view_fill_image_param(struct anv_device *device, - struct anv_image_view *view, - struct brw_image_param *param); -void anv_buffer_view_fill_image_param(struct anv_device *device, - struct anv_buffer_view *view, - struct brw_image_param *param); - -struct anv_sampler { - uint32_t state[4]; -}; - -struct anv_framebuffer { - uint32_t width; - uint32_t height; - uint32_t layers; - - uint32_t attachment_count; - struct anv_image_view * attachments[0]; -}; - -struct anv_subpass { - uint32_t input_count; - uint32_t * input_attachments; - uint32_t color_count; - uint32_t * color_attachments; - uint32_t * resolve_attachments; - uint32_t depth_stencil_attachment; - - /** Subpass has at least one resolve attachment */ - bool has_resolve; -}; - -struct anv_render_pass_attachment { - const struct anv_format *format; - uint32_t samples; - VkAttachmentLoadOp load_op; - VkAttachmentLoadOp stencil_load_op; -}; - -struct anv_render_pass { - uint32_t attachment_count; - uint32_t subpass_count; - uint32_t * subpass_attachments; - struct anv_render_pass_attachment * attachments; - struct anv_subpass subpasses[0]; -}; - -extern struct anv_render_pass anv_meta_dummy_renderpass; - -struct anv_query_pool_slot { - uint64_t begin; - uint64_t end; - uint64_t available; -}; - -struct anv_query_pool { - VkQueryType type; - uint32_t slots; - struct anv_bo bo; -}; - -VkResult anv_device_init_meta(struct anv_device *device); -void anv_device_finish_meta(struct anv_device *device); - -void *anv_lookup_entrypoint(const char *name); - -void anv_dump_image_to_ppm(struct anv_device *device, - struct anv_image *image, unsigned miplevel, - unsigned array_layer, const char *filename); - -#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType) _obj; \ - } - -#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ - \ - static inline struct __anv_type * \ - __anv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __anv_type *)(uintptr_t) _handle; \ - } \ - \ - static inline __VkType \ - __anv_type ## _to_handle(struct __anv_type *_obj) \ - { \ - return (__VkType)(uintptr_t) _obj; \ - } - -#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ - struct __anv_type *__name = __anv_type ## _from_handle(__handle) - -ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer) -ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) -ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) -ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) -ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) - -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) - -#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ - \ - static inline const __VkType * \ - __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ - { \ - return (const __VkType *) __anv_obj; \ - } - -#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ - const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) - -ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) -ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) - -#ifdef __cplusplus -} -#endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c deleted file mode 100644 index e45b519..0000000 --- a/src/vulkan/anv_query.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -VkResult anv_CreateQueryPool( - VkDevice _device, - const VkQueryPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkQueryPool* pQueryPool) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_query_pool *pool; - VkResult result; - uint32_t slot_size; - uint64_t size; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); - - switch (pCreateInfo->queryType) { - case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: - break; - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - return VK_ERROR_INCOMPATIBLE_DRIVER; - default: - assert(!"Invalid query type"); - } - - slot_size = sizeof(struct anv_query_pool_slot); - pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pool->type = pCreateInfo->queryType; - pool->slots = pCreateInfo->queryCount; - - size = pCreateInfo->queryCount * slot_size; - result = anv_bo_init_new(&pool->bo, device, size); - if (result != VK_SUCCESS) - goto fail; - - pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0); - - *pQueryPool = anv_query_pool_to_handle(pool); - - return VK_SUCCESS; - - fail: - anv_free2(&device->alloc, pAllocator, pool); - - return result; -} - -void anv_DestroyQueryPool( - VkDevice _device, - VkQueryPool _pool, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, _pool); - - anv_gem_munmap(pool->bo.map, pool->bo.size); - anv_gem_close(device, pool->bo.gem_handle); - anv_free2(&device->alloc, pAllocator, pool); -} - -VkResult anv_GetQueryPoolResults( - VkDevice _device, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - size_t dataSize, - void* pData, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - int64_t timeout = INT64_MAX; - uint64_t result; - int ret; - - assert(pool->type == VK_QUERY_TYPE_OCCLUSION || - pool->type == VK_QUERY_TYPE_TIMESTAMP); - - if (pData == NULL) - return VK_SUCCESS; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); - if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem_wait failed %m"); - } - } - - void *data_end = pData + dataSize; - struct anv_query_pool_slot *slot = pool->bo.map; - - for (uint32_t i = 0; i < queryCount; i++) { - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: { - result = slot[firstQuery + i].end - slot[firstQuery + i].begin; - break; - } - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - unreachable("pipeline stats not supported"); - case VK_QUERY_TYPE_TIMESTAMP: { - result = slot[firstQuery + i].begin; - break; - } - default: - unreachable("invalid pool type"); - } - - if (flags & VK_QUERY_RESULT_64_BIT) { - uint64_t *dst = pData; - dst[0] = result; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[firstQuery + i].available; - } else { - uint32_t *dst = pData; - if (result > UINT32_MAX) - result = UINT32_MAX; - dst[0] = result; - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - dst[1] = slot[firstQuery + i].available; - } - - pData += stride; - if (pData >= data_end) - break; - } - - return VK_SUCCESS; -} - -void anv_CmdResetQueryPool( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) -{ - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - for (uint32_t i = 0; i < queryCount; i++) { - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - case VK_QUERY_TYPE_TIMESTAMP: { - struct anv_query_pool_slot *slot = pool->bo.map; - slot[firstQuery + i].available = 0; - break; - } - default: - assert(!"Invalid query type"); - } - } -} diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c deleted file mode 100644 index 22fd01c..0000000 --- a/src/vulkan/anv_util.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include "anv_private.h" - -/** Log an error message. */ -void anv_printflike(1, 2) -anv_loge(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_loge_v(format, va); - va_end(va); -} - -/** \see anv_loge() */ -void -anv_loge_v(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); -} - -void anv_printflike(3, 4) -__anv_finishme(const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); -} - -void anv_noreturn anv_printflike(1, 2) -anv_abortf(const char *format, ...) -{ - va_list va; - - va_start(va, format); - anv_abortfv(format, va); - va_end(va); -} - -void anv_noreturn -anv_abortfv(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); - abort(); -} - -VkResult -__vk_errorf(VkResult error, const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - -#define ERROR_CASE(error) case error: error_str = #error; break; - - const char *error_str; - switch ((int32_t)error) { - - /* Core errors */ - ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY) - ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY) - ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED) - ERROR_CASE(VK_ERROR_DEVICE_LOST) - ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) - ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) - ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) - ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) - - /* Extension errors */ - ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) - - default: - assert(!"Unknown error"); - error_str = "unknown error"; - } - -#undef ERROR_CASE - - if (format) { - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); - } else { - fprintf(stderr, "%s:%d: %s\n", file, line, error_str); - } - - return error; -} - -int -anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) -{ - assert(util_is_power_of_two(size)); - assert(element_size < size && util_is_power_of_two(element_size)); - - vector->head = 0; - vector->tail = 0; - vector->element_size = element_size; - vector->size = size; - vector->data = malloc(size); - - return vector->data != NULL; -} - -void * -anv_vector_add(struct anv_vector *vector) -{ - uint32_t offset, size, split, tail; - void *data; - - if (vector->head - vector->tail == vector->size) { - size = vector->size * 2; - data = malloc(size); - if (data == NULL) - return NULL; - split = align_u32(vector->tail, vector->size); - tail = vector->tail & (vector->size - 1); - if (vector->head - split < vector->size) { - memcpy(data + tail, - vector->data + tail, - split - vector->tail); - memcpy(data + vector->size, - vector->data, vector->head - split); - } else { - memcpy(data + tail, - vector->data + tail, - vector->head - vector->tail); - } - free(vector->data); - vector->data = data; - vector->size = size; - } - - assert(vector->head - vector->tail < vector->size); - - offset = vector->head & (vector->size - 1); - vector->head += vector->element_size; - - return vector->data + offset; -} - -void * -anv_vector_remove(struct anv_vector *vector) -{ - uint32_t offset; - - if (vector->head == vector->tail) - return NULL; - - assert(vector->head - vector->tail <= vector->size); - - offset = vector->tail & (vector->size - 1); - vector->tail += vector->element_size; - - return vector->data + offset; -} diff --git a/src/vulkan/anv_wsi.c b/src/vulkan/anv_wsi.c deleted file mode 100644 index c5911a3..0000000 --- a/src/vulkan/anv_wsi.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_wsi.h" - -VkResult -anv_init_wsi(struct anv_instance *instance) -{ - VkResult result; - - result = anv_x11_init_wsi(instance); - if (result != VK_SUCCESS) - return result; - -#ifdef HAVE_WAYLAND_PLATFORM - result = anv_wl_init_wsi(instance); - if (result != VK_SUCCESS) { - anv_x11_finish_wsi(instance); - return result; - } -#endif - - return VK_SUCCESS; -} - -void -anv_finish_wsi(struct anv_instance *instance) -{ -#ifdef HAVE_WAYLAND_PLATFORM - anv_wl_finish_wsi(instance); -#endif - anv_x11_finish_wsi(instance); -} - -void anv_DestroySurfaceKHR( - VkInstance _instance, - VkSurfaceKHR _surface, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - - anv_free2(&instance->alloc, pAllocator, surface); -} - -VkResult anv_GetPhysicalDeviceSurfaceSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - VkSurfaceKHR _surface, - VkBool32* pSupported) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_support(surface, device, queueFamilyIndex, pSupported); -} - -VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_capabilities(surface, device, pSurfaceCapabilities); -} - -VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_formats(surface, device, pSurfaceFormatCount, - pSurfaceFormats); -} - -VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR _surface, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - - return iface->get_present_modes(surface, device, pPresentModeCount, - pPresentModes); -} - -VkResult anv_CreateSwapchainKHR( - VkDevice _device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSwapchainKHR* pSwapchain) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface); - struct anv_wsi_interface *iface = device->instance->wsi[surface->platform]; - struct anv_swapchain *swapchain; - - VkResult result = iface->create_swapchain(surface, device, pCreateInfo, - pAllocator, &swapchain); - if (result != VK_SUCCESS) - return result; - - *pSwapchain = anv_swapchain_to_handle(swapchain); - - return VK_SUCCESS; -} - -void anv_DestroySwapchainKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - swapchain->destroy(swapchain, pAllocator); -} - -VkResult anv_GetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint32_t* pSwapchainImageCount, - VkImage* pSwapchainImages) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - return swapchain->get_images(swapchain, pSwapchainImageCount, - pSwapchainImages); -} - -VkResult anv_AcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR _swapchain, - uint64_t timeout, - VkSemaphore semaphore, - VkFence fence, - uint32_t* pImageIndex) -{ - ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain); - - return swapchain->acquire_next_image(swapchain, timeout, semaphore, - pImageIndex); -} - -VkResult anv_QueuePresentKHR( - VkQueue _queue, - const VkPresentInfoKHR* pPresentInfo) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - VkResult result; - - for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { - ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]); - - assert(swapchain->device == queue->device); - - result = swapchain->queue_present(swapchain, queue, - pPresentInfo->pImageIndices[i]); - /* TODO: What if one of them returns OUT_OF_DATE? */ - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} diff --git a/src/vulkan/anv_wsi.h b/src/vulkan/anv_wsi.h deleted file mode 100644 index 6e9ff9b..0000000 --- a/src/vulkan/anv_wsi.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "anv_private.h" - -struct anv_swapchain; - -struct anv_wsi_interface { - VkResult (*get_support)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported); - VkResult (*get_capabilities)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR* pSurfaceCapabilities); - VkResult (*get_formats)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats); - VkResult (*get_present_modes)(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes); - VkResult (*create_swapchain)(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); -}; - -struct anv_swapchain { - struct anv_device *device; - - VkResult (*destroy)(struct anv_swapchain *swapchain, - const VkAllocationCallbacks *pAllocator); - VkResult (*get_images)(struct anv_swapchain *swapchain, - uint32_t *pCount, VkImage *pSwapchainImages); - VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain, - uint64_t timeout, VkSemaphore semaphore, - uint32_t *image_index); - VkResult (*queue_present)(struct anv_swapchain *swap_chain, - struct anv_queue *queue, - uint32_t image_index); -}; - -ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR) - -VkResult anv_x11_init_wsi(struct anv_instance *instance); -void anv_x11_finish_wsi(struct anv_instance *instance); -VkResult anv_wl_init_wsi(struct anv_instance *instance); -void anv_wl_finish_wsi(struct anv_instance *instance); diff --git a/src/vulkan/anv_wsi_wayland.c b/src/vulkan/anv_wsi_wayland.c deleted file mode 100644 index 6f25eaf..0000000 --- a/src/vulkan/anv_wsi_wayland.c +++ /dev/null @@ -1,871 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_wsi.h" - -#include - -#define MIN_NUM_IMAGES 2 - -struct wsi_wl_display { - struct wl_display * display; - struct wl_drm * drm; - - /* Vector of VkFormats supported */ - struct anv_vector formats; - - uint32_t capabilities; -}; - -struct wsi_wayland { - struct anv_wsi_interface base; - - struct anv_instance * instance; - - pthread_mutex_t mutex; - /* Hash table of wl_display -> wsi_wl_display mappings */ - struct hash_table * displays; -}; - -static void -wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format) -{ - /* Don't add a format that's already in the list */ - VkFormat *f; - anv_vector_foreach(f, &display->formats) - if (*f == format) - return; - - /* Don't add formats which aren't supported by the driver */ - if (anv_format_for_vk_format(format)->isl_format == - ISL_FORMAT_UNSUPPORTED) { - return; - } - - f = anv_vector_add(&display->formats); - if (f) - *f = format; -} - -static void -drm_handle_device(void *data, struct wl_drm *drm, const char *name) -{ - fprintf(stderr, "wl_drm.device(%s)\n", name); -} - -static uint32_t -wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha) -{ - switch (vk_format) { - /* TODO: Figure out what all the formats mean and make this table - * correct. - */ -#if 0 - case VK_FORMAT_R4G4B4A4_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444; - case VK_FORMAT_R5G6B5_UNORM: - return WL_DRM_FORMAT_BGR565; - case VK_FORMAT_R5G5B5A1_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555; - case VK_FORMAT_R8G8B8_UNORM: - return WL_DRM_FORMAT_XBGR8888; - case VK_FORMAT_R8G8B8A8_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888; - case VK_FORMAT_R10G10B10A2_UNORM: - return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010; - case VK_FORMAT_B4G4R4A4_UNORM: - return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444; - case VK_FORMAT_B5G6R5_UNORM: - return WL_DRM_FORMAT_RGB565; - case VK_FORMAT_B5G5R5A1_UNORM: - return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555; -#endif - case VK_FORMAT_B8G8R8_SRGB: - return WL_DRM_FORMAT_BGRX8888; - case VK_FORMAT_B8G8R8A8_SRGB: - return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888; -#if 0 - case VK_FORMAT_B10G10R10A2_UNORM: - return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010; -#endif - - default: - assert("!Unsupported Vulkan format"); - return 0; - } -} - -static void -drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format) -{ - struct wsi_wl_display *display = data; - - switch (wl_format) { -#if 0 - case WL_DRM_FORMAT_ABGR4444: - case WL_DRM_FORMAT_XBGR4444: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM); - break; - case WL_DRM_FORMAT_BGR565: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM); - break; - case WL_DRM_FORMAT_ABGR1555: - case WL_DRM_FORMAT_XBGR1555: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM); - break; - case WL_DRM_FORMAT_XBGR8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM); - /* fallthrough */ - case WL_DRM_FORMAT_ABGR8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM); - break; - case WL_DRM_FORMAT_ABGR2101010: - case WL_DRM_FORMAT_XBGR2101010: - wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM); - break; - case WL_DRM_FORMAT_ARGB4444: - case WL_DRM_FORMAT_XRGB4444: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM); - break; - case WL_DRM_FORMAT_RGB565: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM); - break; - case WL_DRM_FORMAT_ARGB1555: - case WL_DRM_FORMAT_XRGB1555: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM); - break; -#endif - case WL_DRM_FORMAT_XRGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB); - /* fallthrough */ - case WL_DRM_FORMAT_ARGB8888: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB); - break; -#if 0 - case WL_DRM_FORMAT_ARGB2101010: - case WL_DRM_FORMAT_XRGB2101010: - wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM); - break; -#endif - } -} - -static void -drm_handle_authenticated(void *data, struct wl_drm *drm) -{ -} - -static void -drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities) -{ - struct wsi_wl_display *display = data; - - display->capabilities = capabilities; -} - -static const struct wl_drm_listener drm_listener = { - drm_handle_device, - drm_handle_format, - drm_handle_authenticated, - drm_handle_capabilities, -}; - -static void -registry_handle_global(void *data, struct wl_registry *registry, - uint32_t name, const char *interface, uint32_t version) -{ - struct wsi_wl_display *display = data; - - if (strcmp(interface, "wl_drm") == 0) { - assert(display->drm == NULL); - - assert(version >= 2); - display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2); - - if (display->drm) - wl_drm_add_listener(display->drm, &drm_listener, display); - } -} - -static void -registry_handle_global_remove(void *data, struct wl_registry *registry, - uint32_t name) -{ /* No-op */ } - -static const struct wl_registry_listener registry_listener = { - registry_handle_global, - registry_handle_global_remove -}; - -static void -wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display) -{ - anv_vector_finish(&display->formats); - if (display->drm) - wl_drm_destroy(display->drm); - anv_free(&wsi->instance->alloc, display); -} - -static struct wsi_wl_display * -wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display) -{ - struct wsi_wl_display *display = - anv_alloc(&wsi->instance->alloc, sizeof(*display), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!display) - return NULL; - - memset(display, 0, sizeof(*display)); - - display->display = wl_display; - - if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8)) - goto fail; - - struct wl_registry *registry = wl_display_get_registry(wl_display); - if (!registry) - return NULL; - - wl_registry_add_listener(registry, ®istry_listener, display); - - /* Round-rip to get the wl_drm global */ - wl_display_roundtrip(wl_display); - - if (!display->drm) - goto fail; - - /* Round-rip to get wl_drm formats and capabilities */ - wl_display_roundtrip(wl_display); - - /* We need prime support */ - if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) - goto fail; - - /* We don't need this anymore */ - wl_registry_destroy(registry); - - return display; - -fail: - if (registry) - wl_registry_destroy(registry); - - wsi_wl_display_destroy(wsi, display); - return NULL; -} - -static struct wsi_wl_display * -wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display) -{ - struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; - - pthread_mutex_lock(&wsi->mutex); - - struct hash_entry *entry = _mesa_hash_table_search(wsi->displays, - wl_display); - if (!entry) { - /* We're about to make a bunch of blocking calls. Let's drop the - * mutex for now so we don't block up too badly. - */ - pthread_mutex_unlock(&wsi->mutex); - - struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); - - pthread_mutex_lock(&wsi->mutex); - - entry = _mesa_hash_table_search(wsi->displays, wl_display); - if (entry) { - /* Oops, someone raced us to it */ - wsi_wl_display_destroy(wsi, display); - } else { - entry = _mesa_hash_table_insert(wsi->displays, wl_display, display); - } - } - - pthread_mutex_unlock(&wsi->mutex); - - return entry->data; -} - -VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - struct wl_display* display) -{ - ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - - return wsi_wl_get_display(physical_device->instance, display) != NULL; -} - -static VkResult -wsi_wl_surface_get_support(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported) -{ - *pSupported = true; - - return VK_SUCCESS; -} - -static const VkPresentModeKHR present_modes[] = { - VK_PRESENT_MODE_MAILBOX_KHR, - VK_PRESENT_MODE_FIFO_KHR, -}; - -static VkResult -wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR* caps) -{ - caps->minImageCount = MIN_NUM_IMAGES; - caps->maxImageCount = 4; - caps->currentExtent = (VkExtent2D) { -1, -1 }; - caps->minImageExtent = (VkExtent2D) { 1, 1 }; - caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->maxImageArrayLayers = 1; - - caps->supportedCompositeAlpha = - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; - - caps->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats) -{ - VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; - struct wsi_wl_display *display = - wsi_wl_get_display(device->instance, surface->display); - - uint32_t count = anv_vector_length(&display->formats); - - if (pSurfaceFormats == NULL) { - *pSurfaceFormatCount = count; - return VK_SUCCESS; - } - - assert(*pSurfaceFormatCount >= count); - *pSurfaceFormatCount = count; - - VkFormat *f; - anv_vector_foreach(f, &display->formats) { - *(pSurfaceFormats++) = (VkSurfaceFormatKHR) { - .format = *f, - /* TODO: We should get this from the compositor somehow */ - .colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR, - }; - } - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes) -{ - if (pPresentModes == NULL) { - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; - } - - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); - -VkResult anv_CreateWaylandSurfaceKHR( - VkInstance _instance, - const VkWaylandSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); - - VkIcdSurfaceWayland *surface; - - surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (surface == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND; - surface->display = pCreateInfo->display; - surface->surface = pCreateInfo->surface; - - *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); - - return VK_SUCCESS; -} - -struct wsi_wl_image { - struct anv_image * image; - struct anv_device_memory * memory; - struct wl_buffer * buffer; - bool busy; -}; - -struct wsi_wl_swapchain { - struct anv_swapchain base; - - struct wsi_wl_display * display; - struct wl_event_queue * queue; - struct wl_surface * surface; - - VkExtent2D extent; - VkFormat vk_format; - uint32_t drm_format; - - VkPresentModeKHR present_mode; - bool fifo_ready; - - uint32_t image_count; - struct wsi_wl_image images[0]; -}; - -static VkResult -wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain, - uint32_t *pCount, VkImage *pSwapchainImages) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - if (pSwapchainImages == NULL) { - *pCount = chain->image_count; - return VK_SUCCESS; - } - - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - - *pCount = chain->image_count; - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - int ret = wl_display_dispatch_queue_pending(chain->display->display, - chain->queue); - /* XXX: I'm not sure if out-of-date is the right error here. If - * wl_display_dispatch_queue_pending fails it most likely means we got - * kicked by the server so this seems more-or-less correct. - */ - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - - while (1) { - for (uint32_t i = 0; i < chain->image_count; i++) { - if (!chain->images[i].busy) { - /* We found a non-busy image */ - *image_index = i; - return VK_SUCCESS; - } - } - - /* This time we do a blocking dispatch because we can't go - * anywhere until we get an event. - */ - int ret = wl_display_roundtrip_queue(chain->display->display, - chain->queue); - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } -} - -static void -frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial) -{ - struct wsi_wl_swapchain *chain = data; - - chain->fifo_ready = true; - - wl_callback_destroy(callback); -} - -static const struct wl_callback_listener frame_listener = { - frame_handle_done, -}; - -static VkResult -wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain, - struct anv_queue *queue, - uint32_t image_index) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { - while (!chain->fifo_ready) { - int ret = wl_display_dispatch_queue(chain->display->display, - chain->queue); - if (ret < 0) - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - } - - assert(image_index < chain->image_count); - wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0); - wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX); - - if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) { - struct wl_callback *frame = wl_surface_frame(chain->surface); - wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue); - wl_callback_add_listener(frame, &frame_listener, chain); - chain->fifo_ready = false; - } - - chain->images[image_index].busy = true; - wl_surface_commit(chain->surface); - wl_display_flush(chain->display->display); - - return VK_SUCCESS; -} - -static void -wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, - const VkAllocationCallbacks* pAllocator) -{ - VkDevice vk_device = anv_device_to_handle(chain->base.device); - anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory), - pAllocator); - anv_DestroyImage(vk_device, anv_image_to_handle(image->image), - pAllocator); -} - -static void -buffer_handle_release(void *data, struct wl_buffer *buffer) -{ - struct wsi_wl_image *image = data; - - assert(image->buffer == buffer); - - image->busy = false; -} - -static const struct wl_buffer_listener buffer_listener = { - buffer_handle_release, -}; - -static VkResult -wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image, - const VkAllocationCallbacks* pAllocator) -{ - VkDevice vk_device = anv_device_to_handle(chain->base.device); - VkResult result; - - VkImage vk_image; - result = anv_image_create(vk_device, - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = chain->vk_format, - .extent = { - .width = chain->extent.width, - .height = chain->extent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - pAllocator, - &vk_image); - - if (result != VK_SUCCESS) - return result; - - image->image = anv_image_from_handle(vk_image); - assert(anv_format_is_color(image->image->format)); - - struct anv_surface *surface = &image->image->color_surface; - - VkDeviceMemory vk_memory; - result = anv_AllocateMemory(vk_device, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->image->size, - .memoryTypeIndex = 0, - }, - pAllocator, - &vk_memory); - - if (result != VK_SUCCESS) - goto fail_image; - - image->memory = anv_device_memory_from_handle(vk_memory); - image->memory->bo.is_winsys_bo = true; - - result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0); - - if (result != VK_SUCCESS) - goto fail_mem; - - int ret = anv_gem_set_tiling(chain->base.device, - image->memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_mem; - } - - int fd = anv_gem_handle_to_fd(chain->base.device, - image->memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_mem; - } - - image->buffer = wl_drm_create_prime_buffer(chain->display->drm, - fd, /* name */ - chain->extent.width, - chain->extent.height, - chain->drm_format, - surface->offset, - surface->isl.row_pitch, - 0, 0, 0, 0 /* unused */); - wl_display_roundtrip(chain->display->display); - close(fd); - - wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue); - wl_buffer_add_listener(image->buffer, &buffer_listener, image); - - return VK_SUCCESS; - -fail_mem: - anv_FreeMemory(vk_device, vk_memory, pAllocator); -fail_image: - anv_DestroyImage(vk_device, vk_image, pAllocator); - - return result; -} - -static VkResult -wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) -{ - struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain; - - for (uint32_t i = 0; i < chain->image_count; i++) { - if (chain->images[i].buffer) - wsi_wl_image_finish(chain, &chain->images[i], pAllocator); - } - - anv_free2(&chain->base.device->alloc, pAllocator, chain); - - return VK_SUCCESS; -} - -static VkResult -wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain_out) -{ - VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; - struct wsi_wl_swapchain *chain; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - - int num_images = pCreateInfo->minImageCount; - - assert(num_images >= MIN_NUM_IMAGES); - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); - - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->base.device = device; - chain->base.destroy = wsi_wl_swapchain_destroy; - chain->base.get_images = wsi_wl_swapchain_get_images; - chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image; - chain->base.queue_present = wsi_wl_swapchain_queue_present; - - chain->surface = surface->surface; - chain->extent = pCreateInfo->imageExtent; - chain->vk_format = pCreateInfo->imageFormat; - chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false); - - chain->present_mode = pCreateInfo->presentMode; - chain->fifo_ready = true; - - chain->image_count = num_images; - - /* Mark a bunch of stuff as NULL. This way we can just call - * destroy_swapchain for cleanup. - */ - for (uint32_t i = 0; i < chain->image_count; i++) - chain->images[i].buffer = NULL; - chain->queue = NULL; - - chain->display = wsi_wl_get_display(device->instance, surface->display); - if (!chain->display) - goto fail; - - chain->queue = wl_display_create_queue(chain->display->display); - if (!chain->queue) - goto fail; - - for (uint32_t i = 0; i < chain->image_count; i++) { - result = wsi_wl_image_init(chain, &chain->images[i], pAllocator); - if (result != VK_SUCCESS) - goto fail; - chain->images[i].busy = false; - } - - *swapchain_out = &chain->base; - - return VK_SUCCESS; - -fail: - wsi_wl_swapchain_destroy(&chain->base, pAllocator); - - return result; -} - -VkResult -anv_wl_init_wsi(struct anv_instance *instance) -{ - struct wsi_wayland *wsi; - VkResult result; - - wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - wsi->instance = instance; - - int ret = pthread_mutex_init(&wsi->mutex, NULL); - if (ret != 0) { - if (ret == ENOMEM) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - goto fail_alloc; - } - - wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - if (!wsi->displays) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mutex; - } - - wsi->base.get_support = wsi_wl_surface_get_support; - wsi->base.get_capabilities = wsi_wl_surface_get_capabilities; - wsi->base.get_formats = wsi_wl_surface_get_formats; - wsi->base.get_present_modes = wsi_wl_surface_get_present_modes; - wsi->base.create_swapchain = wsi_wl_surface_create_swapchain; - - instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base; - - return VK_SUCCESS; - -fail_mutex: - pthread_mutex_destroy(&wsi->mutex); - -fail_alloc: - anv_free(&instance->alloc, wsi); -fail: - instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL; - - return result; -} - -void -anv_wl_finish_wsi(struct anv_instance *instance) -{ - struct wsi_wayland *wsi = - (struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; - - if (wsi) { - _mesa_hash_table_destroy(wsi->displays, NULL); - - pthread_mutex_destroy(&wsi->mutex); - - anv_free(&instance->alloc, wsi); - } -} diff --git a/src/vulkan/anv_wsi_x11.c b/src/vulkan/anv_wsi_x11.c deleted file mode 100644 index 843a6b6..0000000 --- a/src/vulkan/anv_wsi_x11.c +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "anv_wsi.h" - -#include "util/hash_table.h" - -struct wsi_x11_connection { - bool has_dri3; - bool has_present; -}; - -struct wsi_x11 { - struct anv_wsi_interface base; - - pthread_mutex_t mutex; - /* Hash table of xcb_connection -> wsi_x11_connection mappings */ - struct hash_table *connections; -}; - -static struct wsi_x11_connection * -wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn) -{ - xcb_query_extension_cookie_t dri3_cookie, pres_cookie; - xcb_query_extension_reply_t *dri3_reply, *pres_reply; - - struct wsi_x11_connection *wsi_conn = - anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi_conn) - return NULL; - - dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); - pres_cookie = xcb_query_extension(conn, 7, "PRESENT"); - - dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL); - pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL); - if (dri3_reply == NULL || pres_reply == NULL) { - free(dri3_reply); - free(pres_reply); - anv_free(&instance->alloc, wsi_conn); - return NULL; - } - - wsi_conn->has_dri3 = dri3_reply->present != 0; - wsi_conn->has_present = pres_reply->present != 0; - - free(dri3_reply); - free(pres_reply); - - return wsi_conn; -} - -static void -wsi_x11_connection_destroy(struct anv_instance *instance, - struct wsi_x11_connection *conn) -{ - anv_free(&instance->alloc, conn); -} - -static struct wsi_x11_connection * -wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn) -{ - struct wsi_x11 *wsi = - (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; - - pthread_mutex_lock(&wsi->mutex); - - struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); - if (!entry) { - /* We're about to make a bunch of blocking calls. Let's drop the - * mutex for now so we don't block up too badly. - */ - pthread_mutex_unlock(&wsi->mutex); - - struct wsi_x11_connection *wsi_conn = - wsi_x11_connection_create(instance, conn); - - pthread_mutex_lock(&wsi->mutex); - - entry = _mesa_hash_table_search(wsi->connections, conn); - if (entry) { - /* Oops, someone raced us to it */ - wsi_x11_connection_destroy(instance, wsi_conn); - } else { - entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); - } - } - - pthread_mutex_unlock(&wsi->mutex); - - return entry->data; -} - -static const VkSurfaceFormatKHR formats[] = { - { .format = VK_FORMAT_B8G8R8A8_SRGB, }, -}; - -static const VkPresentModeKHR present_modes[] = { - VK_PRESENT_MODE_MAILBOX_KHR, -}; - -static xcb_screen_t * -get_screen_for_root(xcb_connection_t *conn, xcb_window_t root) -{ - xcb_screen_iterator_t screen_iter = - xcb_setup_roots_iterator(xcb_get_setup(conn)); - - for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { - if (screen_iter.data->root == root) - return screen_iter.data; - } - - return NULL; -} - -static xcb_visualtype_t * -screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id, - unsigned *depth) -{ - xcb_depth_iterator_t depth_iter = - xcb_screen_allowed_depths_iterator(screen); - - for (; depth_iter.rem; xcb_depth_next (&depth_iter)) { - xcb_visualtype_iterator_t visual_iter = - xcb_depth_visuals_iterator (depth_iter.data); - - for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) { - if (visual_iter.data->visual_id == visual_id) { - if (depth) - *depth = depth_iter.data->depth; - return visual_iter.data; - } - } - } - - return NULL; -} - -static xcb_visualtype_t * -connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id, - unsigned *depth) -{ - xcb_screen_iterator_t screen_iter = - xcb_setup_roots_iterator(xcb_get_setup(conn)); - - /* For this we have to iterate over all of the screens which is rather - * annoying. Fortunately, there is probably only 1. - */ - for (; screen_iter.rem; xcb_screen_next (&screen_iter)) { - xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data, - visual_id, depth); - if (visual) - return visual; - } - - return NULL; -} - -static xcb_visualtype_t * -get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window, - unsigned *depth) -{ - xcb_query_tree_cookie_t tree_cookie; - xcb_get_window_attributes_cookie_t attrib_cookie; - xcb_query_tree_reply_t *tree; - xcb_get_window_attributes_reply_t *attrib; - - tree_cookie = xcb_query_tree(conn, window); - attrib_cookie = xcb_get_window_attributes(conn, window); - - tree = xcb_query_tree_reply(conn, tree_cookie, NULL); - attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL); - if (attrib == NULL || tree == NULL) { - free(attrib); - free(tree); - return NULL; - } - - xcb_window_t root = tree->root; - xcb_visualid_t visual_id = attrib->visual; - free(attrib); - free(tree); - - xcb_screen_t *screen = get_screen_for_root(conn, root); - if (screen == NULL) - return NULL; - - return screen_get_visualtype(screen, visual_id, depth); -} - -static bool -visual_has_alpha(xcb_visualtype_t *visual, unsigned depth) -{ - uint32_t rgb_mask = visual->red_mask | - visual->green_mask | - visual->blue_mask; - - uint32_t all_mask = 0xffffffff >> (32 - depth); - - /* Do we have bits left over after RGB? */ - return (all_mask & ~rgb_mask) != 0; -} - -VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - xcb_connection_t* connection, - xcb_visualid_t visual_id) -{ - ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); - - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(device->instance, connection); - - if (!wsi_conn->has_dri3) { - fprintf(stderr, "vulkan: No DRI3 support\n"); - return false; - } - - unsigned visual_depth; - if (!connection_get_visualtype(connection, visual_id, &visual_depth)) - return false; - - if (visual_depth != 24 && visual_depth != 32) - return false; - - return true; -} - -static VkResult -x11_surface_get_support(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - uint32_t queueFamilyIndex, - VkBool32* pSupported) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(device->instance, surface->connection); - if (!wsi_conn) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - if (!wsi_conn->has_dri3) { - fprintf(stderr, "vulkan: No DRI3 support\n"); - *pSupported = false; - return VK_SUCCESS; - } - - unsigned visual_depth; - if (!get_visualtype_for_window(surface->connection, surface->window, - &visual_depth)) { - *pSupported = false; - return VK_SUCCESS; - } - - if (visual_depth != 24 && visual_depth != 32) { - *pSupported = false; - return VK_SUCCESS; - } - - *pSupported = true; - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, - struct anv_physical_device *device, - VkSurfaceCapabilitiesKHR *caps) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - xcb_get_geometry_cookie_t geom_cookie; - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom; - unsigned visual_depth; - - geom_cookie = xcb_get_geometry(surface->connection, surface->window); - - /* This does a round-trip. This is why we do get_geometry first and - * wait to read the reply until after we have a visual. - */ - xcb_visualtype_t *visual = - get_visualtype_for_window(surface->connection, surface->window, - &visual_depth); - - geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err); - if (geom) { - VkExtent2D extent = { geom->width, geom->height }; - caps->currentExtent = extent; - caps->minImageExtent = extent; - caps->maxImageExtent = extent; - } else { - /* This can happen if the client didn't wait for the configure event - * to come back from the compositor. In that case, we don't know the - * size of the window so we just return valid "I don't know" stuff. - */ - caps->currentExtent = (VkExtent2D) { -1, -1 }; - caps->minImageExtent = (VkExtent2D) { 1, 1 }; - caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; - } - free(err); - free(geom); - - if (visual_has_alpha(visual, visual_depth)) { - caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | - VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR; - } else { - caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR | - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - } - - caps->minImageCount = 2; - caps->maxImageCount = 4; - caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; - caps->maxImageArrayLayers = 1; - caps->supportedUsageFlags = - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_formats(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t *pSurfaceFormatCount, - VkSurfaceFormatKHR *pSurfaceFormats) -{ - if (pSurfaceFormats == NULL) { - *pSurfaceFormatCount = ARRAY_SIZE(formats); - return VK_SUCCESS; - } - - assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); - typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); - *pSurfaceFormatCount = ARRAY_SIZE(formats); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_get_present_modes(VkIcdSurfaceBase *surface, - struct anv_physical_device *device, - uint32_t *pPresentModeCount, - VkPresentModeKHR *pPresentModes) -{ - if (pPresentModes == NULL) { - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; - } - - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); - typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_create_swapchain(VkIcdSurfaceBase *surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain); - -VkResult anv_CreateXcbSurfaceKHR( - VkInstance _instance, - const VkXcbSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR); - - VkIcdSurfaceXcb *surface; - - surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (surface == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - surface->base.platform = VK_ICD_WSI_PLATFORM_XCB; - surface->connection = pCreateInfo->connection; - surface->window = pCreateInfo->window; - - *pSurface = _VkIcdSurfaceBase_to_handle(&surface->base); - - return VK_SUCCESS; -} - -struct x11_image { - struct anv_image * image; - struct anv_device_memory * memory; - xcb_pixmap_t pixmap; - xcb_get_geometry_cookie_t geom_cookie; - bool busy; -}; - -struct x11_swapchain { - struct anv_swapchain base; - - xcb_connection_t * conn; - xcb_window_t window; - xcb_gc_t gc; - VkExtent2D extent; - uint32_t image_count; - uint32_t next_image; - struct x11_image images[0]; -}; - -static VkResult -x11_get_images(struct anv_swapchain *anv_chain, - uint32_t* pCount, VkImage *pSwapchainImages) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - - if (pSwapchainImages == NULL) { - *pCount = chain->image_count; - return VK_SUCCESS; - } - - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image); - - *pCount = chain->image_count; - - return VK_SUCCESS; -} - -static VkResult -x11_acquire_next_image(struct anv_swapchain *anv_chain, - uint64_t timeout, - VkSemaphore semaphore, - uint32_t *image_index) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[chain->next_image]; - - if (image->busy) { - xcb_generic_error_t *err; - xcb_get_geometry_reply_t *geom = - xcb_get_geometry_reply(chain->conn, image->geom_cookie, &err); - if (!geom) { - free(err); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - - if (geom->width != chain->extent.width || - geom->height != chain->extent.height) { - free(geom); - return vk_error(VK_ERROR_OUT_OF_DATE_KHR); - } - free(geom); - - image->busy = false; - } - - *image_index = chain->next_image; - chain->next_image = (chain->next_image + 1) % chain->image_count; - return VK_SUCCESS; -} - -static VkResult -x11_queue_present(struct anv_swapchain *anv_chain, - struct anv_queue *queue, - uint32_t image_index) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - struct x11_image *image = &chain->images[image_index]; - - assert(image_index < chain->image_count); - - xcb_void_cookie_t cookie; - - cookie = xcb_copy_area(chain->conn, - image->pixmap, - chain->window, - chain->gc, - 0, 0, - 0, 0, - chain->extent.width, - chain->extent.height); - xcb_discard_reply(chain->conn, cookie.sequence); - - image->geom_cookie = xcb_get_geometry(chain->conn, chain->window); - image->busy = true; - - xcb_flush(chain->conn); - - return VK_SUCCESS; -} - -static VkResult -x11_swapchain_destroy(struct anv_swapchain *anv_chain, - const VkAllocationCallbacks *pAllocator) -{ - struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; - xcb_void_cookie_t cookie; - - for (uint32_t i = 0; i < chain->image_count; i++) { - struct x11_image *image = &chain->images[i]; - - if (image->busy) - xcb_discard_reply(chain->conn, image->geom_cookie.sequence); - - cookie = xcb_free_pixmap(chain->conn, image->pixmap); - xcb_discard_reply(chain->conn, cookie.sequence); - - /* TODO: Delete images and free memory */ - } - - anv_free2(&chain->base.device->alloc, pAllocator, chain); - - return VK_SUCCESS; -} - -static VkResult -x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, - struct anv_device *device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks* pAllocator, - struct anv_swapchain **swapchain_out) -{ - VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface; - struct x11_swapchain *chain; - xcb_void_cookie_t cookie; - VkResult result; - - int num_images = pCreateInfo->minImageCount; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); - chain = anv_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (chain == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - chain->base.device = device; - chain->base.destroy = x11_swapchain_destroy; - chain->base.get_images = x11_get_images; - chain->base.acquire_next_image = x11_acquire_next_image; - chain->base.queue_present = x11_queue_present; - - chain->conn = surface->connection; - chain->window = surface->window; - chain->extent = pCreateInfo->imageExtent; - chain->image_count = num_images; - chain->next_image = 0; - - for (uint32_t i = 0; i < chain->image_count; i++) { - VkDeviceMemory memory_h; - VkImage image_h; - struct anv_image *image; - struct anv_surface *surface; - struct anv_device_memory *memory; - - anv_image_create(anv_device_to_handle(device), - &(struct anv_image_create_info) { - .isl_tiling_flags = ISL_TILING_X_BIT, - .stride = 0, - .vk_info = - &(VkImageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = VK_IMAGE_TYPE_2D, - .format = pCreateInfo->imageFormat, - .extent = { - .width = pCreateInfo->imageExtent.width, - .height = pCreateInfo->imageExtent.height, - .depth = 1 - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = 1, - /* FIXME: Need a way to use X tiling to allow scanout */ - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .flags = 0, - }}, - NULL, - &image_h); - - image = anv_image_from_handle(image_h); - assert(anv_format_is_color(image->format)); - - surface = &image->color_surface; - - anv_AllocateMemory(anv_device_to_handle(device), - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = image->size, - .memoryTypeIndex = 0, - }, - NULL /* XXX: pAllocator */, - &memory_h); - - memory = anv_device_memory_from_handle(memory_h); - memory->bo.is_winsys_bo = true; - - anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), - memory_h, 0); - - int ret = anv_gem_set_tiling(device, memory->bo.gem_handle, - surface->isl.row_pitch, I915_TILING_X); - if (ret) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "set_tiling failed: %m"); - goto fail; - } - - int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); - if (fd == -1) { - /* FINISHME: Choose a better error. */ - result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "handle_to_fd failed: %m"); - goto fail; - } - - uint32_t bpp = 32; - uint32_t depth = 24; - xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); - - cookie = - xcb_dri3_pixmap_from_buffer_checked(chain->conn, - pixmap, - chain->window, - image->size, - pCreateInfo->imageExtent.width, - pCreateInfo->imageExtent.height, - surface->isl.row_pitch, - depth, bpp, fd); - - chain->images[i].image = image; - chain->images[i].memory = memory; - chain->images[i].pixmap = pixmap; - chain->images[i].busy = false; - - xcb_discard_reply(chain->conn, cookie.sequence); - } - - chain->gc = xcb_generate_id(chain->conn); - if (!chain->gc) { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - cookie = xcb_create_gc(chain->conn, - chain->gc, - chain->window, - XCB_GC_GRAPHICS_EXPOSURES, - (uint32_t []) { 0 }); - xcb_discard_reply(chain->conn, cookie.sequence); - - *swapchain_out = &chain->base; - - return VK_SUCCESS; - - fail: - return result; -} - -VkResult -anv_x11_init_wsi(struct anv_instance *instance) -{ - struct wsi_x11 *wsi; - VkResult result; - - wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!wsi) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail; - } - - int ret = pthread_mutex_init(&wsi->mutex, NULL); - if (ret != 0) { - if (ret == ENOMEM) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } else { - /* FINISHME: Choose a better error. */ - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - - goto fail_alloc; - } - - wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - if (!wsi->connections) { - result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_mutex; - } - - wsi->base.get_support = x11_surface_get_support; - wsi->base.get_capabilities = x11_surface_get_capabilities; - wsi->base.get_formats = x11_surface_get_formats; - wsi->base.get_present_modes = x11_surface_get_present_modes; - wsi->base.create_swapchain = x11_surface_create_swapchain; - - instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base; - - return VK_SUCCESS; - -fail_mutex: - pthread_mutex_destroy(&wsi->mutex); -fail_alloc: - anv_free(&instance->alloc, wsi); -fail: - instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL; - - return result; -} - -void -anv_x11_finish_wsi(struct anv_instance *instance) -{ - struct wsi_x11 *wsi = - (struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB]; - - if (wsi) { - _mesa_hash_table_destroy(wsi->connections, NULL); - - pthread_mutex_destroy(&wsi->mutex); - - anv_free(&instance->alloc, wsi); - } -} diff --git a/src/vulkan/dev_icd.json.in b/src/vulkan/dev_icd.json.in deleted file mode 100644 index 8492036..0000000 --- a/src/vulkan/dev_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@build_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} diff --git a/src/vulkan/gen7_cmd_buffer.c b/src/vulkan/gen7_cmd_buffer.c deleted file mode 100644 index 23327ec..0000000 --- a/src/vulkan/gen7_cmd_buffer.c +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_CONSTANT_VS, - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer0 = { .offset = state.offset }, - .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer, - uint32_t stages) -{ - static const uint32_t sampler_state_opcodes[] = { - [MESA_SHADER_VERTEX] = 43, - [MESA_SHADER_TESS_CTRL] = 44, /* HS */ - [MESA_SHADER_TESS_EVAL] = 45, /* DS */ - [MESA_SHADER_GEOMETRY] = 46, - [MESA_SHADER_FRAGMENT] = 47, - [MESA_SHADER_COMPUTE] = 0, - }; - - static const uint32_t binding_table_opcodes[] = { - [MESA_SHADER_VERTEX] = 38, - [MESA_SHADER_TESS_CTRL] = 39, - [MESA_SHADER_TESS_EVAL] = 40, - [MESA_SHADER_GEOMETRY] = 41, - [MESA_SHADER_FRAGMENT] = 42, - [MESA_SHADER_COMPUTE] = 0, - }; - - anv_foreach_stage(s, stages) { - if (cmd_buffer->state.samplers[s].alloc_size > 0) { - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[s], - .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset); - } - - /* Always emit binding table pointers if we're asked to, since on SKL - * this is what flushes push constants. */ - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[s], - .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset); - } -} - -GENX_FUNC(GEN7, GEN7) uint32_t -genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer) -{ - VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty & - cmd_buffer->state.pipeline->active_stages; - - VkResult result = VK_SUCCESS; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - break; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); - - result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); - assert(result == VK_SUCCESS); - - /* Re-emit state base addresses so we get the new surface state base - * address before we start emitting binding tables etc. - */ - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - /* Re-emit all active binding tables */ - dirty |= cmd_buffer->state.pipeline->active_stages; - anv_foreach_stage(s, dirty) { - result = anv_cmd_buffer_emit_samplers(cmd_buffer, s, - &cmd_buffer->state.samplers[s]); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s, - &cmd_buffer->state.binding_tables[s]); - if (result != VK_SUCCESS) - return result; - } - } - - cmd_buffer->state.descriptors_dirty &= ~dirty; - - return dirty; -} - -static inline int64_t -clamp_int64(int64_t x, int64_t min, int64_t max) -{ - if (x < min) - return min; - else if (x < max) - return x; - else - return max; -} - -static void -emit_scissor_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkRect2D *scissors) -{ - struct anv_state scissor_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkRect2D *s = &scissors[i]; - - /* Since xmax and ymax are inclusive, we have to have xmax < xmin or - * ymax < ymin for empty clips. In case clip x, y, width height are all - * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't - * what we want. Just special case empty clips and produce a canonical - * empty clip. */ - static const struct GEN7_SCISSOR_RECT empty_scissor = { - .ScissorRectangleYMin = 1, - .ScissorRectangleXMin = 1, - .ScissorRectangleYMax = 0, - .ScissorRectangleXMax = 0 - }; - - const int max = 0xffff; - struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) - }; - - if (s->extent.width <= 0 || s->extent.height <= 0) { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, - &empty_scissor); - } else { - GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor); - } - } - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS, - .ScissorRectPointer = scissor_state.offset); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(scissor_state); -} - -GENX_FUNC(GEN7, GEN7) void -genX(cmd_buffer_emit_scissor)(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.scissor.count > 0) { - emit_scissor_state(cmd_buffer, cmd_buffer->state.dynamic.scissor.count, - cmd_buffer->state.dynamic.scissor.scissors); - } else { - /* Emit a default scissor based on the currently bound framebuffer */ - emit_scissor_state(cmd_buffer, 1, - &(VkRect2D) { - .offset = { .x = 0, .y = 0, }, - .extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - }, - }); - } -} - -static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, -}; - -static const uint32_t restart_index_for_type[] = { - [VK_INDEX_TYPE_UINT16] = UINT16_MAX, - [VK_INDEX_TYPE_UINT32] = UINT32_MAX, -}; - -void genX(CmdBindIndexBuffer)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; - if (ANV_IS_HASWELL) - cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - cmd_buffer->state.gen7.index_buffer = buffer; - cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType]; - cmd_buffer->state.gen7.index_offset = offset; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * 4; - unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - unsigned push_constant_regs = reg_aligned_constant_size / 32; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); - } - - assert(prog_data->total_shared <= 64 * 1024); - uint32_t slm_size = 0; - if (prog_data->total_shared > 0) { - /* slm_size is in 4k increments, but must be a power of 2. */ - slm_size = 4 * 1024; - while (slm_size < prog_data->total_shared) - slm_size <<= 1; - slm_size /= 4 * 1024; - } - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_INTERFACE_DESCRIPTOR_DATA, 64, - .KernelStartPointer = pipeline->cs_simd, - .BindingTablePointer = surfaces.offset, - .SamplerStatePointer = samplers.offset, - .ConstantURBEntryReadLength = - push_constant_regs, - .ConstantURBEntryReadOffset = 0, - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - pipeline->cs_thread_width_max); - - const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - if (cmd_buffer->state.current_pipeline != GPGPU) { - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - /* FIXME: figure out descriptors for gen7 */ - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; -} - -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GEN7_3DSTATE_VERTEX_BUFFERS); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GEN7_VERTEX_BUFFER_STATE state = { - .VertexBufferIndex = vb, - .BufferAccessType = pipeline->instancing_enable[vb] ? INSTANCEDATA : VERTEXDATA, - .VertexBufferMemoryObjectControlState = GEN7_MOCS, - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .EndAddress = { buffer->bo, buffer->offset + buffer->size - 1}, - .InstanceDataStepRate = 1 - }; - - GEN7_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - gen7_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - if (cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_VERTEX_BIT || - cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_VERTEX_BIT) { - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth - * stall needs to be sent just prior to any 3DSTATE_VS, - * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS, - * 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one - * PIPE_CONTROL needs to be sent before any combination of VS - * associated 3DSTATE." - */ - anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL, - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &cmd_buffer->device->workaround_bo, 0 }); - } - - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) { - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - } - - if (cmd_buffer->state.push_constants_dirty) - cmd_buffer_flush_push_constants(cmd_buffer); - - /* We use the gen8 state here because it only contains the additional - * min/max fields and, since they occur at the end of the packet and - * don't change the stride, they work on gen7 too. - */ - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) { - - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const uint32_t depth_format = image ? - isl_surf_get_depth_format(&cmd_buffer->device->isl_dev, - &image->depth_surface.isl) : D16_UNORM; - - uint32_t sf_dw[GEN7_3DSTATE_SF_length]; - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, - .DepthBufferSurfaceFormat = depth_format, - .LineWidth = cmd_buffer->state.dynamic.line_width, - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, - .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp - }; - GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf); - - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN7_COLOR_CALC_STATE_length * 4, - 64); - struct GEN7_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, - }; - GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_RENDER_TARGETS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length]; - - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - - struct GEN7_DEPTH_STENCIL_STATE depth_stencil = { - .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT), - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, - }; - GEN7_DEPTH_STENCIL_STATE_pack(NULL, depth_stencil_dw, &depth_stencil); - - struct anv_state ds_state = - anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw, - pipeline->gen7.depth_stencil_state, - GEN7_DEPTH_STENCIL_STATE_length, 64); - - anv_batch_emit(&cmd_buffer->batch, - GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, - .PointertoDEPTH_STENCIL_STATE = ds_state.offset); - } - - if (cmd_buffer->state.gen7.index_buffer && - cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_INDEX_BUFFER)) { - struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer; - uint32_t offset = cmd_buffer->state.gen7.index_offset; - - if (ANV_IS_HASWELL) { - anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF, - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index); - } - - anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_INDEX_BUFFER, - .CutIndexEnable = pipeline->primitive_restart, - .IndexFormat = cmd_buffer->state.gen7.index_type, - .MemoryObjectControlState = GEN7_MOCS, - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size }); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - stub(); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - stub(); -} diff --git a/src/vulkan/gen7_pipeline.c b/src/vulkan/gen7_pipeline.c deleted file mode 100644 index 7c054fa..0000000 --- a/src/vulkan/gen7_pipeline.c +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -#include "genX_pipeline_util.h" - -static void -gen7_emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterizationStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - struct GEN7_3DSTATE_SF sf = { - GEN7_3DSTATE_SF_header, - - /* LegacyGlobalDepthBiasEnable */ - - .StatisticsEnable = true, - .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ViewTransformEnable = !(extra && extra->disable_viewport), - .FrontWinding = vk_to_gen_front_face[info->frontFace], - /* bool AntiAliasingEnable; */ - - .CullMode = vk_to_gen_cullmode[info->cullMode], - - /* uint32_t LineEndCapAntialiasingRegionWidth; */ - .ScissorRectangleEnable = !(extra && extra->disable_scissor), - - /* uint32_t MultisampleRasterizationMode; */ - /* bool LastPixelEnable; */ - - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - - /* uint32_t AALineDistanceMode; */ - /* uint32_t VertexSubPixelPrecisionSelect; */ - .UsePointWidthState = !pipeline->writes_point_size, - .PointWidth = 1.0, - }; - - GEN7_3DSTATE_SF_pack(NULL, &pipeline->gen7.sf, &sf); -} - -static void -gen7_emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->gen7.depth_stencil_state, 0, - sizeof(pipeline->gen7.depth_stencil_state)); - return; - } - - struct GEN7_DEPTH_STENCIL_STATE state = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], - - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], - .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp], - .BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], - }; - - GEN7_DEPTH_STENCIL_STATE_pack(NULL, &pipeline->gen7.depth_stencil_state, &state); -} - -static void -gen7_emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - if (info == NULL || info->attachmentCount == 0) { - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, - .ColorBufferBlendEnable = false, - .WriteDisableAlpha = true, - .WriteDisableRed = true, - .WriteDisableGreen = true, - .WriteDisableBlue = true); - } else { - /* FIXME-GEN7: All render targets share blend state settings on gen7, we - * can't implement this. - */ - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0]; - pipeline->blend_state = - anv_state_pool_emit(&device->dynamic_state_pool, - GEN7_BLEND_STATE, 64, - - .ColorBufferBlendEnable = a->blendEnable, - .IndependentAlphaBlendEnable = true, /* FIXME: yes? */ - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - -# if 0 - bool AlphaToOneEnable; - bool AlphaToCoverageDitherEnable; -# endif - - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - -# if 0 - bool AlphaTestEnable; - uint32_t AlphaTestFunction; - bool ColorDitherEnable; - uint32_t XDitherOffset; - uint32_t YDitherOffset; - uint32_t ColorClampRange; - bool PreBlendColorClampEnable; - bool PostBlendColorClampEnable; -# endif - ); - } - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_BLEND_STATE_POINTERS, - .BlendStatePointer = pipeline->blend_state.offset); -} - -GENX_FUNC(GEN7, GEN75) VkResult -genX(graphics_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_pipeline_init(pipeline, device, cache, - pCreateInfo, extra, pAllocator); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); - - assert(pCreateInfo->pRasterizationState); - gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra); - - gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - - gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState, - pCreateInfo->pMultisampleState); - - emit_urb_setup(pipeline); - - const VkPipelineRasterizationStateCreateInfo *rs_info = - pCreateInfo->pRasterizationState; - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_CLIP, - .FrontWinding = vk_to_gen_front_face[rs_info->frontFace], - .CullMode = vk_to_gen_cullmode[rs_info->cullMode], - .ClipEnable = true, - .APIMode = APIMODE_OGL, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .ClipMode = CLIPMODE_NORMAL, - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875, - .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - - if (pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->rasterizationSamples > 1) - anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); - - uint32_t samples = 1; - uint32_t log2_samples = __builtin_ffs(samples) - 1; - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_MULTISAMPLE, - .PixelLocation = PIXLOC_CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SAMPLE_MASK, - .SampleMask = 0xff); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* The last geometry producing stage will set urb_offset and urb_length, - * which we use in 3DSTATE_SBE. Skip the VUE header and position slots. */ - uint32_t urb_offset = 1; - uint32_t urb_length = (vue_prog_data->vue_map.num_slots + 1) / 2 - urb_offset; - -#if 0 - /* From gen7_vs_state.c */ - - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (!brw->is_haswell && !brw->is_baytrail) - gen7_emit_vs_workaround_flush(brw); -#endif - - if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .KernelStartPointer = pipeline->vs_vec4, - .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), - - .DispatchGRFStartRegisterforURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = true, - .VSFunctionEnable = true); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - - if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); - } else { - urb_offset = 1; - urb_length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - urb_offset; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), - .KernelStartPointer = pipeline->gs_kernel, - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], - .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, - .DispatchGRFStartRegisterforURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads - 1, - /* This in the next dword on HSW. */ - .ControlDataFormat = gs_prog_data->control_data_format, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - .InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1, - .DispatchMode = gs_prog_data->base.dispatch_mode, - .GSStatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, -# if (ANV_IS_HASWELL) - .ReorderMode = REORDER_TRAILING, -# else - .ReorderEnable = true, -# endif - .GSEnable = true); - } - - if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_finishme("disabling ps"); - - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, - .StatisticsEnable = true, - .ThreadDispatchEnable = false, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT); - - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); - - } else { - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || - wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) - anv_finishme("two-sided color needs sbe swizzling setup"); - if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) - anv_finishme("primitive_id needs sbe swizzling setup"); - - /* FIXME: generated header doesn't emit attr swizzle fields */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_SBE, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, - .VertexURBEntryReadLength = urb_length, - .VertexURBEntryReadOffset = urb_offset, - .PointSpriteTextureCoordinateOrigin = UPPERLEFT); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), - - .MaximumNumberofThreads = device->info.max_wm_threads - 1, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - - .RenderTargetFastClearEnable = false, - .DualSourceBlendEnable = false, - .RenderTargetResolveEnable = false, - - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE : POSOFFSET_NONE, - - ._32PixelDispatchEnable = false, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - - .DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterforConstantSetupData1 = 0, - .DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2, - -#if 0 - /* Haswell requires the sample mask to be set in this packet as well as - * in 3DSTATE_SAMPLE_MASK; the values should match. */ - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ -#endif - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - /* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */ - anv_batch_emit(&pipeline->batch, GEN7_3DSTATE_WM, - .StatisticsEnable = true, - .ThreadDispatchEnable = true, - .LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */ - .LineAntialiasingRegionWidth = 1, /* 1.0 pixels */ - .EarlyDepthStencilControl = EDSC_NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, - .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, - .PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask, - .BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes); - } - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/gen7_state.c b/src/vulkan/gen7_state.c deleted file mode 100644 index 77bdb75..0000000 --- a/src/vulkan/gen7_state.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen7_pack.h" -#include "genxml/gen75_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GEN7_PIPELINE_SELECT, - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .DSFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GEN7_3DSTATE_AA_LINE_PARAMETERS); - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -GENX_FUNC(GEN7, GEN75) void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, - uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN_4, - .SurfaceHorizontalAlignment = HALIGN_4, - .TiledSurface = false, - .RenderCacheReadWriteMode = false, - .SurfaceObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, -# if (ANV_IS_HASWELL) - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, -# endif - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - struct GEN7_SAMPLER_STATE sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampEnable = CLAMP_ENABLE_OGL, - .BaseMipLevel = 0.0, - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, - pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, - pCreateInfo->anisotropyEnable), - .TextureLODBias = pCreateInfo->mipLodBias * 256, - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = pCreateInfo->minLod, - .MaxLOD = pCreateInfo->maxLod, - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .BorderColorPointer = - device->border_colors.offset + - pCreateInfo->borderColor * sizeof(float) * 4, - - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GEN7_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - -static const uint8_t anv_halign[] = { - [4] = HALIGN_4, - [8] = HALIGN_8, -}; - -static const uint8_t anv_valign[] = { - [2] = VALIGN_2, - [4] = VALIGN_4, -}; - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) - anv_finishme("non-2D image views"); - - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - uint32_t depth = 1; - if (range->layerCount > 1) { - depth = range->layerCount; - } else if (image->extent.depth > 1) { - depth = image->extent.depth; - } - - const struct isl_extent3d image_align_sa = - isl_surf_get_image_alignment_sa(&surface->isl); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, - usage == VK_IMAGE_USAGE_STORAGE_BIT), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = anv_valign[image_align_sa.height], - .SurfaceHorizontalAlignment = anv_halign[image_align_sa.width], - - /* From bspec (DevSNB, DevIVB): "Set Tile Walk to TILEWALK_XMAJOR if - * Tiled Surface is False." - */ - .TiledSurface = surface->isl.tiling != ISL_TILING_LINEAR, - .TileWalk = surface->isl.tiling == ISL_TILING_Y0 ? - TILEWALK_YMAJOR : TILEWALK_XMAJOR, - - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - - .RenderCacheReadWriteMode = 0, /* TEMPLATE */ - - .Height = image->extent.height - 1, - .Width = image->extent.width - 1, - .Depth = depth - 1, - .SurfacePitch = surface->isl.row_pitch - 1, - .MinimumArrayElement = range->baseArrayLayer, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .XOffset = 0, - .YOffset = 0, - - .SurfaceObjectControlState = GENX(MOCS), - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .MCSEnable = false, -# if (ANV_IS_HASWELL) - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], -# else /* XXX: Seriously? */ - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, -# endif - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} diff --git a/src/vulkan/gen8_cmd_buffer.c b/src/vulkan/gen8_cmd_buffer.c deleted file mode 100644 index b741612..0000000 --- a/src/vulkan/gen8_cmd_buffer.c +++ /dev/null @@ -1,914 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -static uint32_t -cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer) -{ - static const uint32_t push_constant_opcodes[] = { - [MESA_SHADER_VERTEX] = 21, - [MESA_SHADER_TESS_CTRL] = 25, /* HS */ - [MESA_SHADER_TESS_EVAL] = 26, /* DS */ - [MESA_SHADER_GEOMETRY] = 22, - [MESA_SHADER_FRAGMENT] = 23, - [MESA_SHADER_COMPUTE] = 0, - }; - - VkShaderStageFlags flushed = 0; - - anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) { - if (stage == MESA_SHADER_COMPUTE) - continue; - - struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage); - - if (state.offset == 0) - continue; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), - ._3DCommandSubOpcode = push_constant_opcodes[stage], - .ConstantBody = { - .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset }, - .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32), - }); - - flushed |= mesa_to_vk_shader_stage(stage); - } - - cmd_buffer->state.push_constants_dirty &= ~flushed; - - return flushed; -} - -#if ANV_GEN == 8 -static void -emit_viewport_state(struct anv_cmd_buffer *cmd_buffer, - uint32_t count, const VkViewport *viewports) -{ - struct anv_state sf_clip_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64); - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32); - - for (uint32_t i = 0; i < count; i++) { - const VkViewport *vp = &viewports[i]; - - /* The gen7 state struct has just the matrix and guardband fields, the - * gen8 struct adds the min/max viewport fields. */ - struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = { - .ViewportMatrixElementm00 = vp->width / 2, - .ViewportMatrixElementm11 = vp->height / 2, - .ViewportMatrixElementm22 = 1.0, - .ViewportMatrixElementm30 = vp->x + vp->width / 2, - .ViewportMatrixElementm31 = vp->y + vp->height / 2, - .ViewportMatrixElementm32 = 0.0, - .XMinClipGuardband = -1.0f, - .XMaxClipGuardband = 1.0f, - .YMinClipGuardband = -1.0f, - .YMaxClipGuardband = 1.0f, - .XMinViewPort = vp->x, - .XMaxViewPort = vp->x + vp->width - 1, - .YMinViewPort = vp->y, - .YMaxViewPort = vp->y + vp->height - 1, - }; - - struct GENX(CC_VIEWPORT) cc_viewport = { - .MinimumDepth = vp->minDepth, - .MaximumDepth = vp->maxDepth - }; - - GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, - &sf_clip_viewport); - GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); - } - - if (!cmd_buffer->device->info.has_llc) { - anv_state_clflush(sf_clip_state); - anv_state_clflush(cc_state); - } - - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), - .CCViewportPointer = cc_state.offset); - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), - .SFClipViewportPointer = sf_clip_state.offset); -} - -void -gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.dynamic.viewport.count > 0) { - emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); - } else { - /* If viewport count is 0, this is taken to mean "use the default" */ - emit_viewport_state(cmd_buffer, 1, - &(VkViewport) { - .x = 0.0f, - .y = 0.0f, - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height, - .minDepth = 0.0f, - .maxDepth = 1.0f, - }); - } -} -#endif - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -#define GEN8_L3CNTLREG 0x7034 - -static void -config_l3(struct anv_cmd_buffer *cmd_buffer, bool enable_slm) -{ - /* References for GL state: - * - * - commits e307cfa..228d5a3 - * - src/mesa/drivers/dri/i965/gen7_l3_state.c - */ - - uint32_t val = enable_slm ? - /* All = 48 ways; URB = 16 ways; DC and RO = 0, SLM = 1 */ - 0x60000021 : - /* All = 48 ways; URB = 48 ways; DC, RO and SLM = 0 */ - 0x60000060; - bool changed = cmd_buffer->state.current_l3_config != val; - - if (changed) { - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true, - .ConstantCacheInvalidationEnable = true, - .InstructionCacheInvalidateEnable = true, - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DCFlushEnable = true, - .PostSyncOperation = NoWrite, - .CommandStreamerStallEnable = true); - - emit_lri(&cmd_buffer->batch, GEN8_L3CNTLREG, val); - cmd_buffer->state.current_l3_config = val; - } -} - -static void -__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t sf_dw[GENX(3DSTATE_SF_length)]; - struct GENX(3DSTATE_SF) sf = { - GENX(3DSTATE_SF_header), - .LineWidth = cmd_buffer->state.dynamic.line_width, - }; - GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); - /* FIXME: gen9.fs */ - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, - cmd_buffer->state.pipeline->gen8.sf); -} -static void -__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - uint32_t sf_dw[GENX(3DSTATE_SF_length)]; - struct GEN9_3DSTATE_SF sf = { - GEN9_3DSTATE_SF_header, - .LineWidth = cmd_buffer->state.dynamic.line_width, - }; - GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf); - /* FIXME: gen9.fs */ - anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, - cmd_buffer->state.pipeline->gen8.sf); -} - -static void -__emit_sf_state(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->device->info.is_cherryview) - __emit_gen9_sf_state(cmd_buffer); - else - __emit_genx_sf_state(cmd_buffer); -} - -void -genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t *p; - - uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; - - assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - - config_l3(cmd_buffer, false); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - if (vb_emit) { - const uint32_t num_buffers = __builtin_popcount(vb_emit); - const uint32_t num_dwords = 1 + num_buffers * 4; - - p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, - GENX(3DSTATE_VERTEX_BUFFERS)); - uint32_t vb, i = 0; - for_each_bit(vb, vb_emit) { - struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; - uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; - - struct GENX(VERTEX_BUFFER_STATE) state = { - .VertexBufferIndex = vb, - .MemoryObjectControlState = GENX(MOCS), - .AddressModifyEnable = true, - .BufferPitch = pipeline->binding_stride[vb], - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset - }; - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state); - i++; - } - } - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - } - - /* We emit the binding tables and sampler tables first, then emit push - * constants and then finally emit binding table and sampler table - * pointers. It has to happen in this order, since emitting the binding - * tables may change the push constants (in case of storage images). After - * emitting push constants, on SKL+ we have to emit the corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect. - */ - uint32_t dirty = 0; - if (cmd_buffer->state.descriptors_dirty) - dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer); - - if (cmd_buffer->state.push_constants_dirty) - dirty |= cmd_buffer_flush_push_constants(cmd_buffer); - - if (dirty) - gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) - gen8_cmd_buffer_emit_viewport(cmd_buffer); - - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) - gen7_cmd_buffer_emit_scissor(cmd_buffer); - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) { - __emit_sf_state(cmd_buffer); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){ - bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f || - cmd_buffer->state.dynamic.depth_bias.slope != 0.0f; - - uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; - struct GENX(3DSTATE_RASTER) raster = { - GENX(3DSTATE_RASTER_header), - .GlobalDepthOffsetEnableSolid = enable_bias, - .GlobalDepthOffsetEnableWireframe = enable_bias, - .GlobalDepthOffsetEnablePoint = enable_bias, - .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias, - .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope, - .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp - }; - GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); - anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, - pipeline->gen8.raster); - } - - /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to - * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split - * across different state packets for gen8 and gen9. We handle that by - * using a big old #if switch here. - */ -#if ANV_GEN == 8 - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN8_COLOR_CALC_STATE_length * 4, - 64); - struct GEN8_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - .StencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.front, - .BackFaceStencilReferenceValue = - cmd_buffer->state.dynamic.stencil_reference.back, - }; - GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) { - uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; - - struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN8_3DSTATE_WM_DEPTH_STENCIL_header, - - /* Is this what we need to do? */ - .StencilBufferWriteEnable = - cmd_buffer->state.dynamic.stencil_write_mask.front != 0, - - .StencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff, - .StencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = - cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = - cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff, - }; - GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw, - &wm_depth_stencil); - - anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw, - pipeline->gen8.wm_depth_stencil); - } -#else - if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) { - struct anv_state cc_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, - GEN9_COLOR_CALC_STATE_length * 4, - 64); - struct GEN9_COLOR_CALC_STATE cc = { - .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0], - .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1], - .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2], - .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3], - }; - GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc); - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(cc_state); - - anv_batch_emit(&cmd_buffer->batch, - GEN9_3DSTATE_CC_STATE_POINTERS, - .ColorCalcStatePointer = cc_state.offset, - .ColorCalcStatePointerValid = true); - } - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | - ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) { - uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length]; - struct anv_dynamic_state *d = &cmd_buffer->state.dynamic; - struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { - GEN9_3DSTATE_WM_DEPTH_STENCIL_header, - - .StencilBufferWriteEnable = d->stencil_write_mask.front != 0 || - d->stencil_write_mask.back != 0, - - .StencilTestMask = d->stencil_compare_mask.front & 0xff, - .StencilWriteMask = d->stencil_write_mask.front & 0xff, - - .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff, - .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff, - - .StencilReferenceValue = d->stencil_reference.front, - .BackfaceStencilReferenceValue = d->stencil_reference.back - }; - GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil); - - anv_batch_emit_merge(&cmd_buffer->batch, dwords, - pipeline->gen9.wm_depth_stencil); - } -#endif - - if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE | - ANV_CMD_DIRTY_INDEX_BUFFER)) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), - .IndexedDrawCutIndexEnable = pipeline->primitive_restart, - .CutIndex = cmd_buffer->state.restart_index, - ); - } - - cmd_buffer->state.vb_dirty &= ~vb_emit; - cmd_buffer->state.dirty = 0; -} - -void genX(CmdBindIndexBuffer)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - - static const uint32_t vk_to_gen_index_type[] = { - [VK_INDEX_TYPE_UINT16] = INDEX_WORD, - [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, - }; - - static const uint32_t restart_index_for_type[] = { - [VK_INDEX_TYPE_UINT16] = UINT16_MAX, - [VK_INDEX_TYPE_UINT32] = UINT32_MAX, - }; - - cmd_buffer->state.restart_index = restart_index_for_type[indexType]; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), - .IndexFormat = vk_to_gen_index_type[indexType], - .MemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, - .BufferSize = buffer->size - offset); - - cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; -} - -static VkResult -flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct anv_state surfaces = { 0, }, samplers = { 0, }; - VkResult result; - - result = anv_cmd_buffer_emit_samplers(cmd_buffer, - MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; - result = anv_cmd_buffer_emit_binding_table(cmd_buffer, - MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - - unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; - unsigned push_constant_data_size = - (prog_data->nr_params + local_id_dwords) * 4; - unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); - unsigned push_constant_regs = reg_aligned_constant_size / 32; - - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), - .CURBETotalDataLength = push_state.alloc_size, - .CURBEDataStartAddress = push_state.offset); - } - - assert(prog_data->total_shared <= 64 * 1024); - uint32_t slm_size = 0; - if (prog_data->total_shared > 0) { - /* slm_size is in 4k increments, but must be a power of 2. */ - slm_size = 4 * 1024; - while (slm_size < prog_data->total_shared) - slm_size <<= 1; - slm_size /= 4 * 1024; - } - - struct anv_state state = - anv_state_pool_emit(&device->dynamic_state_pool, - GENX(INTERFACE_DESCRIPTOR_DATA), 64, - .KernelStartPointer = pipeline->cs_simd, - .KernelStartPointerHigh = 0, - .BindingTablePointer = surfaces.offset, - .BindingTableEntryCount = 0, - .SamplerStatePointer = samplers.offset, - .SamplerCount = 0, - .ConstantIndirectURBEntryReadLength = push_constant_regs, - .ConstantURBEntryReadOffset = 0, - .BarrierEnable = cs_prog_data->uses_barrier, - .SharedLocalMemorySize = slm_size, - .NumberofThreadsinGPGPUThreadGroup = - pipeline->cs_thread_width_max); - - uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), - .InterfaceDescriptorTotalLength = size, - .InterfaceDescriptorDataStartAddress = state.offset); - - return VK_SUCCESS; -} - -void -genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - VkResult result; - - assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; - config_l3(cmd_buffer, needs_slm); - - if (cmd_buffer->state.current_pipeline != GPGPU) { -#if ANV_GEN < 10 - /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT: - * - * Software must clear the COLOR_CALC_STATE Valid field in - * 3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT - * with Pipeline Select set to GPGPU. - * - * The internal hardware docs recommend the same workaround for Gen9 - * hardware too. - */ - anv_batch_emit(&cmd_buffer->batch, - GENX(3DSTATE_CC_STATE_POINTERS)); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = GPGPU); - cmd_buffer->state.current_pipeline = GPGPU; - } - - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); - - if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || - (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { - result = flush_compute_descriptor_set(cmd_buffer); - assert(result == VK_SUCCESS); - cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; - } - - cmd_buffer->state.compute_dirty = 0; -} - -static void -emit_ps_depth_count(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WritePSDepthCount, - .DepthStallEnable = true, - .Address = { bo, offset }); -} - -static void -emit_query_availability(struct anv_batch *batch, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { bo, offset }, - .ImmediateData = 1); -} - -void genX(CmdBeginQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - /* Workaround: When meta uses the pipeline with the VS disabled, it seems - * that the pipelining of the depth write breaks. What we see is that - * samples from the render pass clear leaks into the first query - * immediately after the clear. Doing a pipecontrol with a post-sync - * operation and DepthStallEnable seems to work around the issue. - */ - if (cmd_buffer->state.need_query_wa) { - cmd_buffer->state.need_query_wa = false; - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DepthCacheFlushEnable = true, - .DepthStallEnable = true); - } - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot)); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -void genX(CmdEndQuery)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 8); - - emit_query_availability(&cmd_buffer->batch, &pool->bo, - query * sizeof(struct anv_query_pool_slot) + 16); - break; - - case VK_QUERY_TYPE_PIPELINE_STATISTICS: - default: - unreachable(""); - } -} - -#define TIMESTAMP 0x2358 - -void genX(CmdWriteTimestamp)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - uint32_t offset = query * sizeof(struct anv_query_pool_slot); - - assert(pool->type == VK_QUERY_TYPE_TIMESTAMP); - - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP, - .MemoryAddress = { &pool->bo, offset }); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = TIMESTAMP + 4, - .MemoryAddress = { &pool->bo, offset + 4 }); - break; - - default: - /* Everything else is bottom-of-pipe */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteTimestamp, - .Address = { &pool->bo, offset }); - break; - } - - emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16); -} - -#define alu_opcode(v) __gen_uint((v), 20, 31) -#define alu_operand1(v) __gen_uint((v), 10, 19) -#define alu_operand2(v) __gen_uint((v), 0, 9) -#define alu(opcode, operand1, operand2) \ - alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) - -#define OPCODE_NOOP 0x000 -#define OPCODE_LOAD 0x080 -#define OPCODE_LOADINV 0x480 -#define OPCODE_LOAD0 0x081 -#define OPCODE_LOAD1 0x481 -#define OPCODE_ADD 0x100 -#define OPCODE_SUB 0x101 -#define OPCODE_AND 0x102 -#define OPCODE_OR 0x103 -#define OPCODE_XOR 0x104 -#define OPCODE_STORE 0x180 -#define OPCODE_STOREINV 0x580 - -#define OPERAND_R0 0x00 -#define OPERAND_R1 0x01 -#define OPERAND_R2 0x02 -#define OPERAND_R3 0x03 -#define OPERAND_R4 0x04 -#define OPERAND_SRCA 0x20 -#define OPERAND_SRCB 0x21 -#define OPERAND_ACCU 0x31 -#define OPERAND_ZF 0x32 -#define OPERAND_CF 0x33 - -#define CS_GPR(n) (0x2600 + (n) * 8) - -static void -emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -static void -store_query_result(struct anv_batch *batch, uint32_t reg, - struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags) -{ - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); - - if (flags & VK_QUERY_RESULT_64_BIT) - anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), - .RegisterAddress = reg + 4, - .MemoryAddress = { bo, offset + 4 }); -} - -void genX(CmdCopyQueryPoolResults)( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer destBuffer, - VkDeviceSize destOffset, - VkDeviceSize destStride, - VkQueryResultFlags flags) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); - uint32_t slot_offset, dst_offset; - - if (flags & VK_QUERY_RESULT_WAIT_BIT) - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .CommandStreamerStallEnable = true, - .StallAtPixelScoreboard = true); - - dst_offset = buffer->offset + destOffset; - for (uint32_t i = 0; i < queryCount; i++) { - - slot_offset = (firstQuery + i) * sizeof(struct anv_query_pool_slot); - switch (pool->type) { - case VK_QUERY_TYPE_OCCLUSION: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(0), &pool->bo, slot_offset); - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(1), &pool->bo, slot_offset + 8); - - /* FIXME: We need to clamp the result for 32 bit. */ - - uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); - dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); - dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); - dw[3] = alu(OPCODE_SUB, 0, 0); - dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); - break; - - case VK_QUERY_TYPE_TIMESTAMP: - emit_load_alu_reg_u64(&cmd_buffer->batch, - CS_GPR(2), &pool->bo, slot_offset); - break; - - default: - unreachable("unhandled query type"); - } - - store_query_result(&cmd_buffer->batch, - CS_GPR(2), buffer->bo, dst_offset, flags); - - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), - &pool->bo, slot_offset + 16); - if (flags & VK_QUERY_RESULT_64_BIT) - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 8, flags); - else - store_query_result(&cmd_buffer->batch, - CS_GPR(0), buffer->bo, dst_offset + 4, flags); - } - - dst_offset += destStride; - } -} - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }, - .ImmediateData = VK_EVENT_SET); -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .DestinationAddressType = DAT_PPGTT, - .PostSyncOperation = WriteImmediateData, - .Address = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }, - .ImmediateData = VK_EVENT_RESET); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - for (uint32_t i = 0; i < eventCount; i++) { - ANV_FROM_HANDLE(anv_event, event, pEvents[i]); - - anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), - .WaitMode = PollingMode, - .CompareOperation = COMPARE_SAD_EQUAL_SDD, - .SemaphoreDataDword = VK_EVENT_SET, - .SemaphoreAddress = { - &cmd_buffer->device->dynamic_state_block_pool.bo, - event->state.offset - }); - } - - genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, - false, /* byRegion */ - memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers); -} diff --git a/src/vulkan/gen8_pipeline.c b/src/vulkan/gen8_pipeline.c deleted file mode 100644 index f041156..0000000 --- a/src/vulkan/gen8_pipeline.c +++ /dev/null @@ -1,573 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -#include "genX_pipeline_util.h" - -static void -emit_ia_state(struct anv_pipeline *pipeline, - const VkPipelineInputAssemblyStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), - .PrimitiveTopologyType = pipeline->topology); -} - -static void -emit_rs_state(struct anv_pipeline *pipeline, - const VkPipelineRasterizationStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info, - const struct anv_graphics_pipeline_create_info *extra) -{ - uint32_t samples = 1; - - if (ms_info) - samples = ms_info->rasterizationSamples; - - struct GENX(3DSTATE_SF) sf = { - GENX(3DSTATE_SF_header), - .ViewportTransformEnable = !(extra && extra->disable_viewport), - .TriangleStripListProvokingVertexSelect = 0, - .LineStripListProvokingVertexSelect = 0, - .TriangleFanProvokingVertexSelect = 0, - .PointWidthSource = pipeline->writes_point_size ? Vertex : State, - .PointWidth = 1.0, - }; - - /* FINISHME: VkBool32 rasterizerDiscardEnable; */ - - GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf); - - struct GENX(3DSTATE_RASTER) raster = { - GENX(3DSTATE_RASTER_header), - - /* For details on 3DSTATE_RASTER multisample state, see the BSpec table - * "Multisample Modes State". - */ - .DXMultisampleRasterizationEnable = samples > 1, - .ForcedSampleCount = FSC_NUMRASTSAMPLES_0, - .ForceMultisampling = false, - - .FrontWinding = vk_to_gen_front_face[info->frontFace], - .CullMode = vk_to_gen_cullmode[info->cullMode], - .FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode], - .ScissorRectangleEnable = !(extra && extra->disable_scissor), -#if ANV_GEN == 8 - .ViewportZClipTestEnable = true, -#else - /* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */ - .ViewportZFarClipTestEnable = true, - .ViewportZNearClipTestEnable = true, -#endif - }; - - GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster); -} - -static void -emit_cb_state(struct anv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - struct anv_device *device = pipeline->device; - - uint32_t num_dwords = GENX(BLEND_STATE_length); - pipeline->blend_state = - anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); - - struct GENX(BLEND_STATE) blend_state = { - .AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable, - .AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable, - }; - - for (uint32_t i = 0; i < info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; - - if (a->srcColorBlendFactor != a->srcAlphaBlendFactor || - a->dstColorBlendFactor != a->dstAlphaBlendFactor || - a->colorBlendOp != a->alphaBlendOp) { - blend_state.IndependentAlphaBlendEnable = true; - } - - blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) { - .LogicOpEnable = info->logicOpEnable, - .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], - .ColorBufferBlendEnable = a->blendEnable, - .PreBlendSourceOnlyClampEnable = false, - .ColorClampRange = COLORCLAMP_RTFORMAT, - .PreBlendColorClampEnable = true, - .PostBlendColorClampEnable = true, - .SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor], - .DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor], - .ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp], - .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor], - .DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor], - .AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp], - .WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT), - .WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT), - .WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT), - .WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT), - }; - - /* Our hardware applies the blend factor prior to the blend function - * regardless of what function is used. Technically, this means the - * hardware can do MORE than GL or Vulkan specify. However, it also - * means that, for MIN and MAX, we have to stomp the blend factor to - * ONE to make it a no-op. - */ - if (a->colorBlendOp == VK_BLEND_OP_MIN || - a->colorBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE; - } - if (a->alphaBlendOp == VK_BLEND_OP_MIN || - a->alphaBlendOp == VK_BLEND_OP_MAX) { - blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE; - blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE; - } - } - - for (uint32_t i = info->attachmentCount; i < 8; i++) { - blend_state.Entry[i].WriteDisableAlpha = true; - blend_state.Entry[i].WriteDisableRed = true; - blend_state.Entry[i].WriteDisableGreen = true; - blend_state.Entry[i].WriteDisableBlue = true; - } - - GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); - if (!device->info.has_llc) - anv_state_clflush(pipeline->blend_state); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), - .BlendStatePointer = pipeline->blend_state.offset, - .BlendStatePointerValid = true); -} - -static void -emit_ds_state(struct anv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *info) -{ - uint32_t *dw = ANV_GEN == 8 ? - pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil; - - if (info == NULL) { - /* We're going to OR this together with the dynamic state. We need - * to make sure it's initialized to something useful. - */ - memset(pipeline->gen8.wm_depth_stencil, 0, - sizeof(pipeline->gen8.wm_depth_stencil)); - memset(pipeline->gen9.wm_depth_stencil, 0, - sizeof(pipeline->gen9.wm_depth_stencil)); - return; - } - - /* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */ - - struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = { - .DepthTestEnable = info->depthTestEnable, - .DepthBufferWriteEnable = info->depthWriteEnable, - .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], - .DoubleSidedStencilEnable = true, - - .StencilTestEnable = info->stencilTestEnable, - .StencilFailOp = vk_to_gen_stencil_op[info->front.failOp], - .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp], - .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp], - .StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp], - .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp], - .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp], - .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp], - .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp], - }; - - GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil); -} - -static void -emit_ms_state(struct anv_pipeline *pipeline, - const VkPipelineMultisampleStateCreateInfo *info) -{ - uint32_t samples = 1; - uint32_t log2_samples = 0; - - /* From the Vulkan 1.0 spec: - * If pSampleMask is NULL, it is treated as if the mask has all bits - * enabled, i.e. no coverage is removed from fragments. - * - * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits. - */ - uint32_t sample_mask = 0xffff; - - if (info) { - samples = info->rasterizationSamples; - log2_samples = __builtin_ffs(samples) - 1; - } - - if (info && info->pSampleMask) - sample_mask &= info->pSampleMask[0]; - - if (info && info->sampleShadingEnable) - anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable"); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), - - /* The PRM says that this bit is valid only for DX9: - * - * SW can choose to set this bit only for DX9 API. DX10/OGL API's - * should not have any effect by setting or not setting this bit. - */ - .PixelPositionOffsetEnable = false, - - .PixelLocation = CENTER, - .NumberofMultisamples = log2_samples); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), - .SampleMask = sample_mask); -} - -VkResult -genX(graphics_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const struct anv_graphics_pipeline_create_info *extra, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - uint32_t offset, length; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - result = anv_pipeline_init(pipeline, device, cache, - pCreateInfo, extra, pAllocator); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - - assert(pCreateInfo->pVertexInputState); - emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra); - assert(pCreateInfo->pInputAssemblyState); - emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); - assert(pCreateInfo->pRasterizationState); - emit_rs_state(pipeline, pCreateInfo->pRasterizationState, - pCreateInfo->pMultisampleState, extra); - emit_ms_state(pipeline, pCreateInfo->pMultisampleState); - emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); - emit_cb_state(pipeline, pCreateInfo->pColorBlendState, - pCreateInfo->pMultisampleState); - - emit_urb_setup(pipeline); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), - .ClipEnable = true, - .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), - .MinimumPointWidth = 0.125, - .MaximumPointWidth = 255.875, - .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), - .StatisticsEnable = true, - .LineEndCapAntialiasingRegionWidth = _05pixels, - .LineAntialiasingRegionWidth = _10pixels, - .EarlyDepthStencilControl = NORMAL, - .ForceThreadDispatchEnable = NORMAL, - .PointRasterizationRule = RASTRULE_UPPER_RIGHT, - .BarycentricInterpolationMode = - pipeline->ps_ksp0 == NO_KERNEL ? - 0 : pipeline->wm_prog_data.barycentric_interp_modes); - - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_kernel == NO_KERNEL) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), - .SingleProgramFlow = false, - .KernelStartPointer = pipeline->gs_kernel, - .VectorMaskEnable = false, - .SamplerCount = 0, - .BindingTableEntryCount = 0, - .ExpectedVertexCount = gs_prog_data->vertices_in, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY], - .PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base), - - .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, - .OutputTopology = gs_prog_data->output_topology, - .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, - .IncludeVertexHandles = gs_prog_data->base.include_vue_handles, - .DispatchGRFStartRegisterForURBData = - gs_prog_data->base.base.dispatch_grf_start_reg, - - .MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1, - .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, - .DispatchMode = gs_prog_data->base.dispatch_mode, - .StatisticsEnable = true, - .IncludePrimitiveID = gs_prog_data->include_primitive_id, - .ReorderMode = TRAILING, - .Enable = true, - - .ControlDataFormat = gs_prog_data->control_data_format, - - .StaticOutput = gs_prog_data->static_vertex_count >= 0, - .StaticOutputVertexCount = - gs_prog_data->static_vertex_count >= 0 ? - gs_prog_data->static_vertex_count : 0, - - /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: - * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) - * UserClipDistanceCullTestEnableBitmask(v) - */ - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; - /* Skip the VUE header and position slots */ - offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; - - uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : - pipeline->vs_vec4; - - if (vs_start == NO_KERNEL || (extra && extra->disable_vs)) - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .FunctionEnable = false, - /* Even if VS is disabled, SBE still gets the amount of - * vertex data to read from this field. */ - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length); - else - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), - .KernelStartPointer = vs_start, - .SingleVertexDispatch = false, - .VectorMaskEnable = false, - .SamplerCount = 0, - .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, - .ThreadDispatchPriority = false, - .FloatingPointMode = IEEE754, - .IllegalOpcodeExceptionEnable = false, - .AccessesUAV = false, - .SoftwareExceptionEnable = false, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), - - .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, - .VertexURBEntryReadOffset = 0, - - .MaximumNumberofThreads = device->info.max_vs_threads - 1, - .StatisticsEnable = false, - .SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL, - .VertexCacheDisable = false, - .FunctionEnable = true, - - .VertexURBEntryOutputReadOffset = offset, - .VertexURBEntryOutputLength = length, - .UserClipDistanceClipTestEnableBitmask = 0, - .UserClipDistanceCullTestEnableBitmask = 0); - - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - - const int num_thread_bias = ANV_GEN == 8 ? 2 : 1; - if (pipeline->ps_ksp0 == NO_KERNEL) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = false); - } else { - /* TODO: We should clean this up. Among other things, this is mostly - * shared with other gens. - */ - const struct brw_vue_map *fs_input_map; - if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &vue_prog_data->vue_map; - else - fs_input_map = &gs_prog_data->base.vue_map; - - struct GENX(3DSTATE_SBE_SWIZ) swiz = { - GENX(3DSTATE_SBE_SWIZ_header), - }; - - int max_source_attr = 0; - for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = wm_prog_data->urb_setup[attr]; - - if (input_index < 0) - continue; - - int source_attr = fs_input_map->varying_to_slot[attr]; - max_source_attr = MAX2(max_source_attr, source_attr); - - if (input_index >= 16) - continue; - - if (source_attr == -1) { - /* This attribute does not exist in the VUE--that means that the - * vertex shader did not write to it. It could be that it's a - * regular varying read by the fragment shader but not written by - * the vertex shader or it's gl_PrimitiveID. In the first case the - * value is undefined, in the second it needs to be - * gl_PrimitiveID. - */ - swiz.Attribute[input_index].ConstantSource = PRIM_ID; - swiz.Attribute[input_index].ComponentOverrideX = true; - swiz.Attribute[input_index].ComponentOverrideY = true; - swiz.Attribute[input_index].ComponentOverrideZ = true; - swiz.Attribute[input_index].ComponentOverrideW = true; - } else { - /* We have to subtract two slots to accout for the URB entry output - * read offset in the VS and GS stages. - */ - swiz.Attribute[input_index].SourceAttribute = source_attr - 2; - } - } - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), - .AttributeSwizzleEnable = true, - .ForceVertexURBEntryReadLength = false, - .ForceVertexURBEntryReadOffset = false, - .VertexURBEntryReadLength = - DIV_ROUND_UP(max_source_attr + 1, 2), - .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = - wm_prog_data->num_varying_inputs, - -#if ANV_GEN >= 9 - .Attribute0ActiveComponentFormat = ACF_XYZW, - .Attribute1ActiveComponentFormat = ACF_XYZW, - .Attribute2ActiveComponentFormat = ACF_XYZW, - .Attribute3ActiveComponentFormat = ACF_XYZW, - .Attribute4ActiveComponentFormat = ACF_XYZW, - .Attribute5ActiveComponentFormat = ACF_XYZW, - .Attribute6ActiveComponentFormat = ACF_XYZW, - .Attribute7ActiveComponentFormat = ACF_XYZW, - .Attribute8ActiveComponentFormat = ACF_XYZW, - .Attribute9ActiveComponentFormat = ACF_XYZW, - .Attribute10ActiveComponentFormat = ACF_XYZW, - .Attribute11ActiveComponentFormat = ACF_XYZW, - .Attribute12ActiveComponentFormat = ACF_XYZW, - .Attribute13ActiveComponentFormat = ACF_XYZW, - .Attribute14ActiveComponentFormat = ACF_XYZW, - .Attribute15ActiveComponentFormat = ACF_XYZW, - /* wow, much field, very attribute */ - .Attribute16ActiveComponentFormat = ACF_XYZW, - .Attribute17ActiveComponentFormat = ACF_XYZW, - .Attribute18ActiveComponentFormat = ACF_XYZW, - .Attribute19ActiveComponentFormat = ACF_XYZW, - .Attribute20ActiveComponentFormat = ACF_XYZW, - .Attribute21ActiveComponentFormat = ACF_XYZW, - .Attribute22ActiveComponentFormat = ACF_XYZW, - .Attribute23ActiveComponentFormat = ACF_XYZW, - .Attribute24ActiveComponentFormat = ACF_XYZW, - .Attribute25ActiveComponentFormat = ACF_XYZW, - .Attribute26ActiveComponentFormat = ACF_XYZW, - .Attribute27ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute28ActiveComponentFormat = ACF_XYZW, - .Attribute29ActiveComponentFormat = ACF_XYZW, - .Attribute30ActiveComponentFormat = ACF_XYZW, -#endif - ); - - uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch, - GENX(3DSTATE_SBE_SWIZ_length)); - GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), - .KernelStartPointer0 = pipeline->ps_ksp0, - - .SingleProgramFlow = false, - .VectorMaskEnable = true, - .SamplerCount = 1, - - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT], - .PerThreadScratchSpace = scratch_space(&wm_prog_data->base), - - .MaximumNumberofThreadsPerPSD = 64 - num_thread_bias, - .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE, - .PushConstantEnable = wm_prog_data->base.nr_params > 0, - ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, - ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, - ._32PixelDispatchEnable = false, - - .DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0, - .DispatchGRFStartRegisterForConstantSetupData1 = 0, - .DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2, - - .KernelStartPointer1 = 0, - .KernelStartPointer2 = pipeline->ps_ksp2); - - bool per_sample_ps = pCreateInfo->pMultisampleState && - pCreateInfo->pMultisampleState->sampleShadingEnable; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), - .PixelShaderValid = true, - .PixelShaderKillsPixel = wm_prog_data->uses_kill, - .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, - .AttributeEnable = wm_prog_data->num_varying_inputs > 0, - .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, - .PixelShaderIsPerSample = per_sample_ps, - .PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth, - .PixelShaderUsesSourceW = wm_prog_data->uses_src_w, -#if ANV_GEN >= 9 - .PixelShaderPullsBary = wm_prog_data->pulls_bary, - .InputCoverageMaskState = wm_prog_data->uses_sample_mask ? - ICMS_INNER_CONSERVATIVE : ICMS_NONE, -#else - .PixelShaderUsesInputCoverageMask = - wm_prog_data->uses_sample_mask, -#endif - ); - } - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/gen8_state.c b/src/vulkan/gen8_state.c deleted file mode 100644 index 04cfff5..0000000 --- a/src/vulkan/gen8_state.c +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "anv_private.h" - -#include "genxml/gen8_pack.h" -#include "genxml/gen9_pack.h" - -#include "genX_state_util.h" - -VkResult -genX(init_device_state)(struct anv_device *device) -{ - struct anv_batch batch; - - uint32_t cmds[64]; - batch.start = batch.next = cmds; - batch.end = (void *) cmds + sizeof(cmds); - - anv_batch_emit(&batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - - anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS), - .StatisticsEnable = true); - anv_batch_emit(&batch, GENX(3DSTATE_HS), .Enable = false); - anv_batch_emit(&batch, GENX(3DSTATE_TE), .TEEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_DS), .FunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), - .ChromaKeyKillEnable = false); - anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS)); - - /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and - * VkPhysicalDeviceFeatures::standardSampleLocations. - */ - anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), - ._1xSample0XOffset = 0.5, - ._1xSample0YOffset = 0.5, - ._2xSample0XOffset = 0.25, - ._2xSample0YOffset = 0.25, - ._2xSample1XOffset = 0.75, - ._2xSample1YOffset = 0.75, - ._4xSample0XOffset = 0.375, - ._4xSample0YOffset = 0.125, - ._4xSample1XOffset = 0.875, - ._4xSample1YOffset = 0.375, - ._4xSample2XOffset = 0.125, - ._4xSample2YOffset = 0.625, - ._4xSample3XOffset = 0.625, - ._4xSample3YOffset = 0.875, - ._8xSample0XOffset = 0.5625, - ._8xSample0YOffset = 0.3125, - ._8xSample1XOffset = 0.4375, - ._8xSample1YOffset = 0.6875, - ._8xSample2XOffset = 0.8125, - ._8xSample2YOffset = 0.5625, - ._8xSample3XOffset = 0.3125, - ._8xSample3YOffset = 0.1875, - ._8xSample4XOffset = 0.1875, - ._8xSample4YOffset = 0.8125, - ._8xSample5XOffset = 0.0625, - ._8xSample5YOffset = 0.4375, - ._8xSample6XOffset = 0.6875, - ._8xSample6YOffset = 0.9375, - ._8xSample7XOffset = 0.9375, - ._8xSample7YOffset = 0.0625, -#if ANV_GEN >= 9 - ._16xSample0XOffset = 0.5625, - ._16xSample0YOffset = 0.5625, - ._16xSample1XOffset = 0.4375, - ._16xSample1YOffset = 0.3125, - ._16xSample2XOffset = 0.3125, - ._16xSample2YOffset = 0.6250, - ._16xSample3XOffset = 0.7500, - ._16xSample3YOffset = 0.4375, - ._16xSample4XOffset = 0.1875, - ._16xSample4YOffset = 0.3750, - ._16xSample5XOffset = 0.6250, - ._16xSample5YOffset = 0.8125, - ._16xSample6XOffset = 0.8125, - ._16xSample6YOffset = 0.6875, - ._16xSample7XOffset = 0.6875, - ._16xSample7YOffset = 0.1875, - ._16xSample8XOffset = 0.3750, - ._16xSample8YOffset = 0.8750, - ._16xSample9XOffset = 0.5000, - ._16xSample9YOffset = 0.0625, - ._16xSample10XOffset = 0.2500, - ._16xSample10YOffset = 0.1250, - ._16xSample11XOffset = 0.1250, - ._16xSample11YOffset = 0.7500, - ._16xSample12XOffset = 0.0000, - ._16xSample12YOffset = 0.5000, - ._16xSample13XOffset = 0.9375, - ._16xSample13YOffset = 0.2500, - ._16xSample14XOffset = 0.8750, - ._16xSample14YOffset = 0.9375, - ._16xSample15XOffset = 0.0625, - ._16xSample15YOffset = 0.0000, -#endif - ); - - anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END)); - - assert(batch.next <= batch.end); - - return anv_device_submit_simple_batch(device, &batch); -} - -static const uint32_t -isl_to_gen_multisample_layout[] = { - [ISL_MSAA_LAYOUT_NONE] = MSS, - [ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL, - [ISL_MSAA_LAYOUT_ARRAY] = MSS, -}; - -void -genX(fill_buffer_surface_state)(void *state, enum isl_format format, - uint32_t offset, uint32_t range, uint32_t stride) -{ - uint32_t num_elements = range / stride; - - struct GENX(RENDER_SURFACE_STATE) surface_state = { - .SurfaceType = SURFTYPE_BUFFER, - .SurfaceArray = false, - .SurfaceFormat = format, - .SurfaceVerticalAlignment = VALIGN4, - .SurfaceHorizontalAlignment = HALIGN4, - .TileMode = LINEAR, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .MemoryObjectControlState = GENX(MOCS), - .Height = ((num_elements - 1) >> 7) & 0x3fff, - .Width = (num_elements - 1) & 0x7f, - .Depth = ((num_elements - 1) >> 21) & 0x3f, - .SurfacePitch = stride - 1, - .NumberofMultisamples = MULTISAMPLECOUNT_1, - .ShaderChannelSelectRed = SCS_RED, - .ShaderChannelSelectGreen = SCS_GREEN, - .ShaderChannelSelectBlue = SCS_BLUE, - .ShaderChannelSelectAlpha = SCS_ALPHA, - /* FIXME: We assume that the image must be bound at this time. */ - .SurfaceBaseAddress = { NULL, offset }, - }; - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state); -} - -static const uint8_t anv_halign[] = { - [4] = HALIGN4, - [8] = HALIGN8, - [16] = HALIGN16, -}; - -static const uint8_t anv_valign[] = { - [4] = VALIGN4, - [8] = VALIGN8, - [16] = VALIGN16, -}; - -/** - * Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment - * and SurfaceVerticalAlignment. - */ -static void -get_halign_valign(const struct isl_surf *surf, uint32_t *halign, uint32_t *valign) -{ - #if ANV_GENx10 >= 90 - if (isl_tiling_is_std_y(surf->tiling) || - surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) { - /* The hardware ignores the alignment values. Anyway, the surface's - * true alignment is likely outside the enum range of HALIGN* and - * VALIGN*. - */ - *halign = 0; - *valign = 0; - } else { - /* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units - * of surface elements (not pixels nor samples). For compressed formats, - * a "surface element" is defined as a compression block. For example, - * if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2 - * format (ETC2 has a block height of 4), then the vertical alignment is - * 4 compression blocks or, equivalently, 16 pixels. - */ - struct isl_extent3d image_align_el - = isl_surf_get_image_alignment_el(surf); - - *halign = anv_halign[image_align_el.width]; - *valign = anv_valign[image_align_el.height]; - } - #else - /* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in - * units of surface samples. For example, if SurfaceVerticalAlignment - * is VALIGN_4 and the surface is singlesampled, then for any surface - * format (compressed or not) the vertical alignment is - * 4 pixels. - */ - struct isl_extent3d image_align_sa - = isl_surf_get_image_alignment_sa(surf); - - *halign = anv_halign[image_align_sa.width]; - *valign = anv_valign[image_align_sa.height]; - #endif -} - -static uint32_t -get_qpitch(const struct isl_surf *surf) -{ - switch (surf->dim) { - default: - unreachable(!"bad isl_surf_dim"); - case ISL_SURF_DIM_1D: - #if ANV_GENx10 >= 90 - /* QPitch is usually expressed as rows of surface elements (where - * a surface element is an compression block or a single surface - * sample). Skylake 1D is an outlier. - * - * From the Skylake BSpec >> Memory Views >> Common Surface - * Formats >> Surface Layout and Tiling >> 1D Surfaces: - * - * Surface QPitch specifies the distance in pixels between array - * slices. - */ - return isl_surf_get_array_pitch_el(surf); - #else - return isl_surf_get_array_pitch_el_rows(surf); - #endif - case ISL_SURF_DIM_2D: - case ISL_SURF_DIM_3D: - #if ANV_GEN >= 9 - return isl_surf_get_array_pitch_el_rows(surf); - #else - /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch - * - * "This field must be set to an integer multiple of the Surface - * Vertical Alignment. For compressed textures (BC*, FXT1, - * ETC*, and EAC* Surface Formats), this field is in units of - * rows in the uncompressed surface, and must be set to an - * integer multiple of the vertical alignment parameter "j" - * defined in the Common Surface Formats section." - */ - return isl_surf_get_array_pitch_sa_rows(surf); - #endif - } -} - -void -genX(fill_image_surface_state)(struct anv_device *device, void *state_map, - struct anv_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo, - VkImageUsageFlagBits usage) -{ - assert(usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)); - assert(util_is_power_of_two(usage)); - - ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - bool is_storage = (usage == VK_IMAGE_USAGE_STORAGE_BIT); - struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, range->aspectMask); - - static const uint8_t isl_to_gen_tiling[] = { - [ISL_TILING_LINEAR] = LINEAR, - [ISL_TILING_X] = XMAJOR, - [ISL_TILING_Y0] = YMAJOR, - [ISL_TILING_Yf] = YMAJOR, - [ISL_TILING_Ys] = YMAJOR, - [ISL_TILING_W] = WMAJOR, - }; - - uint32_t halign, valign; - get_halign_valign(&surface->isl, &halign, &valign); - - struct GENX(RENDER_SURFACE_STATE) template = { - .SurfaceType = anv_surftype(image, pCreateInfo->viewType, is_storage), - .SurfaceArray = image->array_size > 1, - .SurfaceFormat = anv_surface_format(device, iview->format, is_storage), - .SurfaceVerticalAlignment = valign, - .SurfaceHorizontalAlignment = halign, - .TileMode = isl_to_gen_tiling[surface->isl.tiling], - .VerticalLineStride = 0, - .VerticalLineStrideOffset = 0, - .SamplerL2BypassModeDisable = true, - .RenderCacheReadWriteMode = WriteOnlyCache, - .CubeFaceEnablePositiveZ = 1, - .CubeFaceEnableNegativeZ = 1, - .CubeFaceEnablePositiveY = 1, - .CubeFaceEnableNegativeY = 1, - .CubeFaceEnablePositiveX = 1, - .CubeFaceEnableNegativeX = 1, - .MemoryObjectControlState = GENX(MOCS), - - /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in - * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have - * both Base Mip Level fields nonzero". - */ - .BaseMipLevel = 0.0, - - .SurfaceQPitch = get_qpitch(&surface->isl) >> 2, - .Height = iview->level_0_extent.height - 1, - .Width = iview->level_0_extent.width - 1, - .Depth = 0, /* TEMPLATE */ - .SurfacePitch = surface->isl.row_pitch - 1, - .RenderTargetViewExtent = 0, /* TEMPLATE */ - .MinimumArrayElement = 0, /* TEMPLATE */ - .MultisampledSurfaceStorageFormat = - isl_to_gen_multisample_layout[surface->isl.msaa_layout], - .NumberofMultisamples = ffs(surface->isl.samples) - 1, - .MultisamplePositionPaletteIndex = 0, /* UNUSED */ - .XOffset = 0, - .YOffset = 0, - - .MIPCountLOD = 0, /* TEMPLATE */ - .SurfaceMinLOD = 0, /* TEMPLATE */ - - .AuxiliarySurfaceMode = AUX_NONE, - .RedClearColor = 0, - .GreenClearColor = 0, - .BlueClearColor = 0, - .AlphaClearColor = 0, - .ShaderChannelSelectRed = vk_to_gen_swizzle[iview->swizzle.r], - .ShaderChannelSelectGreen = vk_to_gen_swizzle[iview->swizzle.g], - .ShaderChannelSelectBlue = vk_to_gen_swizzle[iview->swizzle.b], - .ShaderChannelSelectAlpha = vk_to_gen_swizzle[iview->swizzle.a], - .ResourceMinLOD = 0.0, - .SurfaceBaseAddress = { NULL, iview->offset }, - }; - - switch (template.SurfaceType) { - case SURFTYPE_1D: - case SURFTYPE_2D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced - * by one for each increase from zero of Minimum Array Element. For - * example, if Minimum Array Element is set to 1024 on a 2D surface, - * the range of this field is reduced to [0,1023]. - * - * In other words, 'Depth' is the number of array layers. - */ - template.Depth = range->layerCount - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 1D and 2D Surfaces: - * This field must be set to the same value as the Depth field. - */ - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_CUBE: - template.MinimumArrayElement = range->baseArrayLayer; - /* Same as SURFTYPE_2D, but divided by 6 */ - template.Depth = range->layerCount / 6 - 1; - template.RenderTargetViewExtent = template.Depth; - break; - case SURFTYPE_3D: - template.MinimumArrayElement = range->baseArrayLayer; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: - * - * If the volume texture is MIP-mapped, this field specifies the - * depth of the base MIP level. - */ - template.Depth = image->extent.depth - 1; - - /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: - * - * For Render Target and Typed Dataport 3D Surfaces: This field - * indicates the extent of the accessible 'R' coordinates minus 1 on - * the LOD currently being rendered to. - */ - template.RenderTargetViewExtent = iview->extent.depth - 1; - break; - default: - unreachable(!"bad SurfaceType"); - } - - if (usage == VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - /* For render target surfaces, the hardware interprets field - * MIPCount/LOD as LOD. The Broadwell PRM says: - * - * MIPCountLOD defines the LOD that will be rendered into. - * SurfaceMinLOD is ignored. - */ - template.MIPCountLOD = range->baseMipLevel; - template.SurfaceMinLOD = 0; - } else { - /* For non render target surfaces, the hardware interprets field - * MIPCount/LOD as MIPCount. The range of levels accessible by the - * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. - */ - template.SurfaceMinLOD = range->baseMipLevel; - template.MIPCountLOD = MAX2(range->levelCount, 1) - 1; - } - - GENX(RENDER_SURFACE_STATE_pack)(NULL, state_map, &template); -} - -VkResult genX(CreateSampler)( - VkDevice _device, - const VkSamplerCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSampler* pSampler) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - uint32_t border_color_offset = device->border_colors.offset + - pCreateInfo->borderColor * 64; - - struct GENX(SAMPLER_STATE) sampler_state = { - .SamplerDisable = false, - .TextureBorderColorMode = DX10OGL, - .LODPreClampMode = CLAMP_MODE_OGL, -#if ANV_GEN == 8 - .BaseMipLevel = 0.0, -#endif - .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode], - .MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter, pCreateInfo->anisotropyEnable), - .MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter, pCreateInfo->anisotropyEnable), - .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), - .AnisotropicAlgorithm = EWAApproximation, - .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), - .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), - .ChromaKeyEnable = 0, - .ChromaKeyIndex = 0, - .ChromaKeyMode = 0, - .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], - .CubeSurfaceControlMode = OVERRIDE, - - .IndirectStatePointer = border_color_offset >> 6, - - .LODClampMagnificationMode = MIPNONE, - .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), - .RAddressMinFilterRoundingEnable = 0, - .RAddressMagFilterRoundingEnable = 0, - .VAddressMinFilterRoundingEnable = 0, - .VAddressMagFilterRoundingEnable = 0, - .UAddressMinFilterRoundingEnable = 0, - .UAddressMagFilterRoundingEnable = 0, - .TrilinearFilterQuality = 0, - .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, - .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], - .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], - .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], - }; - - GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state); - - *pSampler = anv_sampler_to_handle(sampler); - - return VK_SUCCESS; -} diff --git a/src/vulkan/genX_cmd_buffer.c b/src/vulkan/genX_cmd_buffer.c deleted file mode 100644 index 5498d1d..0000000 --- a/src/vulkan/genX_cmd_buffer.c +++ /dev/null @@ -1,717 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include - -#include "anv_private.h" - -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif - -void -genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; - -/* XXX: Do we need this on more than just BDW? */ -#if (ANV_GEN >= 8) - /* Emit a render target cache flush. - * - * This isn't documented anywhere in the PRM. However, it seems to be - * necessary prior to changing the surface state base adress. Without - * this, we get GPU hangs when using multi-level command buffers which - * clear depth, reset state base address, and then go render stuff. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .RenderTargetCacheFlushEnable = true); -#endif - - anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), - .GeneralStateBaseAddress = { scratch_bo, 0 }, - .GeneralStateMemoryObjectControlState = GENX(MOCS), - .GeneralStateBaseAddressModifyEnable = true, - - .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer), - .SurfaceStateMemoryObjectControlState = GENX(MOCS), - .SurfaceStateBaseAddressModifyEnable = true, - - .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, - .DynamicStateMemoryObjectControlState = GENX(MOCS), - .DynamicStateBaseAddressModifyEnable = true, - - .IndirectObjectBaseAddress = { NULL, 0 }, - .IndirectObjectMemoryObjectControlState = GENX(MOCS), - .IndirectObjectBaseAddressModifyEnable = true, - - .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, - .InstructionMemoryObjectControlState = GENX(MOCS), - .InstructionBaseAddressModifyEnable = true, - -# if (ANV_GEN >= 8) - /* Broadwell requires that we specify a buffer size for a bunch of - * these fields. However, since we will be growing the BO's live, we - * just set them all to the maximum. - */ - .GeneralStateBufferSize = 0xfffff, - .GeneralStateBufferSizeModifyEnable = true, - .DynamicStateBufferSize = 0xfffff, - .DynamicStateBufferSizeModifyEnable = true, - .IndirectObjectBufferSize = 0xfffff, - .IndirectObjectBufferSizeModifyEnable = true, - .InstructionBufferSize = 0xfffff, - .InstructionBuffersizeModifyEnable = true, -# endif - ); - - /* After re-setting the surface state base address, we have to do some - * cache flusing so that the sampler engine will pick up the new - * SURFACE_STATE objects and binding tables. From the Broadwell PRM, - * Shared Function > 3D Sampler > State > State Caching (page 96): - * - * Coherency with system memory in the state cache, like the texture - * cache is handled partially by software. It is expected that the - * command stream or shader will issue Cache Flush operation or - * Cache_Flush sampler message to ensure that the L1 cache remains - * coherent with system memory. - * - * [...] - * - * Whenever the value of the Dynamic_State_Base_Addr, - * Surface_State_Base_Addr are altered, the L1 state cache must be - * invalidated to ensure the new surface or sampler state is fetched - * from system memory. - * - * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit - * which, according the PIPE_CONTROL instruction documentation in the - * Broadwell PRM: - * - * Setting this bit is independent of any other bit in this packet. - * This bit controls the invalidation of the L1 and L2 state caches - * at the top of the pipe i.e. at the parsing time. - * - * Unfortunately, experimentation seems to indicate that state cache - * invalidation through a PIPE_CONTROL does nothing whatsoever in - * regards to surface state and binding tables. In stead, it seems that - * invalidating the texture cache is what is actually needed. - * - * XXX: As far as we have been able to determine through - * experimentation, shows that flush the texture cache appears to be - * sufficient. The theory here is that all of the sampling/rendering - * units cache the binding table in the texture cache. However, we have - * yet to be able to actually confirm this. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), - .TextureCacheInvalidationEnable = true); -} - -void genX(CmdPipelineBarrier)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - uint32_t b, *dw; - - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkAccessFlags src_flags = 0; - VkAccessFlags dst_flags = 0; - - for (uint32_t i = 0; i < memoryBarrierCount; i++) { - src_flags |= pMemoryBarriers[i].srcAccessMask; - dst_flags |= pMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - src_flags |= pBufferMemoryBarriers[i].srcAccessMask; - dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - src_flags |= pImageMemoryBarriers[i].srcAccessMask; - dst_flags |= pImageMemoryBarriers[i].dstAccessMask; - } - - /* Mask out the Source access flags we care about */ - const uint32_t src_mask = - VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_TRANSFER_WRITE_BIT; - - src_flags = src_flags & src_mask; - - /* Mask out the destination access flags we care about */ - const uint32_t dst_mask = - VK_ACCESS_INDIRECT_COMMAND_READ_BIT | - VK_ACCESS_INDEX_READ_BIT | - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | - VK_ACCESS_UNIFORM_READ_BIT | - VK_ACCESS_SHADER_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_TRANSFER_READ_BIT; - - dst_flags = dst_flags & dst_mask; - - /* The src flags represent how things were used previously. This is - * what we use for doing flushes. - */ - struct GENX(PIPE_CONTROL) flush_cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - for_each_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - flush_cmd.DCFlushEnable = true; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_cmd.RenderTargetCacheFlushEnable = true; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_cmd.DepthCacheFlushEnable = true; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_cmd.RenderTargetCacheFlushEnable = true; - flush_cmd.DepthCacheFlushEnable = true; - break; - default: - unreachable("should've masked this out by now"); - } - } - - /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to - * stall and wait for the flushing to finish, so we don't re-dirty the - * caches with in-flight rendering after the second PIPE_CONTROL - * invalidates. - */ - - if (dst_flags) - flush_cmd.CommandStreamerStallEnable = true; - - if (src_flags && dst_flags) { - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd); - } - - /* The dst flags represent how things will be used in the future. This - * is what we use for doing cache invalidations. - */ - struct GENX(PIPE_CONTROL) invalidate_cmd = { - GENX(PIPE_CONTROL_header), - .PostSyncOperation = NoWrite, - }; - - for_each_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - invalidate_cmd.VFCacheInvalidationEnable = true; - break; - case VK_ACCESS_UNIFORM_READ_BIT: - invalidate_cmd.ConstantCacheInvalidationEnable = true; - /* fallthrough */ - case VK_ACCESS_SHADER_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - case VK_ACCESS_TRANSFER_READ_BIT: - invalidate_cmd.TextureCacheInvalidationEnable = true; - break; - default: - unreachable("should've masked this out by now"); - } - } - - if (dst_flags) { - dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length)); - GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd); - } -} - -static void -emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_bo *bo, uint32_t offset) -{ - uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5, - GENX(3DSTATE_VERTEX_BUFFERS)); - - GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1, - &(struct GENX(VERTEX_BUFFER_STATE)) { - .VertexBufferIndex = 32, /* Reserved for this */ - .AddressModifyEnable = true, - .BufferPitch = 0, -#if (ANV_GEN >= 8) - .MemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { bo, offset }, - .BufferSize = 8 -#else - .VertexBufferMemoryObjectControlState = GENX(MOCS), - .BufferStartingAddress = { bo, offset }, - .EndAddress = { bo, offset + 8 }, -#endif - }); -} - -static void -emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer, - uint32_t base_vertex, uint32_t base_instance) -{ - struct anv_state id_state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4); - - ((uint32_t *)id_state.map)[0] = base_vertex; - ((uint32_t *)id_state.map)[1] = base_instance; - - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(id_state); - - emit_base_vertex_instance_bo(cmd_buffer, - &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset); -} - -void genX(CmdDraw)( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = vertexCount, - .StartVertexLocation = firstVertex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = 0); -} - -void genX(CmdDrawIndexed)( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology, - .VertexCountPerInstance = indexCount, - .StartVertexLocation = firstIndex, - .InstanceCount = instanceCount, - .StartInstanceLocation = firstInstance, - .BaseVertexLocation = vertexOffset); -} - -/* Auto-Draw / Indirect Registers */ -#define GEN7_3DPRIM_END_OFFSET 0x2420 -#define GEN7_3DPRIM_START_VERTEX 0x2430 -#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 -#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 -#define GEN7_3DPRIM_START_INSTANCE 0x243C -#define GEN7_3DPRIM_BASE_VERTEX 0x2440 - -static void -emit_lrm(struct anv_batch *batch, - uint32_t reg, struct anv_bo *bo, uint32_t offset) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), - .RegisterAddress = reg, - .MemoryAddress = { bo, offset }); -} - -static void -emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) -{ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), - .RegisterOffset = reg, - .DataDWord = imm); -} - -void genX(CmdDrawIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); - emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = SEQUENTIAL, - .PrimitiveTopologyType = pipeline->topology); -} - -void genX(CmdDrawIndexedIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - genX(cmd_buffer_flush_state)(cmd_buffer); - - /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); - - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); - emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); - - anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), - .IndirectParameterEnable = true, - .VertexAccessType = RANDOM, - .PrimitiveTopologyType = pipeline->topology); -} - - -void genX(CmdDispatch)( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - - if (prog_data->uses_num_work_groups) { - struct anv_state state = - anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4); - uint32_t *sizes = state.map; - sizes[0] = x; - sizes[1] = y; - sizes[2] = z; - if (!cmd_buffer->device->info.has_llc) - anv_state_clflush(state); - cmd_buffer->state.num_workgroups_offset = state.offset; - cmd_buffer->state.num_workgroups_bo = - &cmd_buffer->device->dynamic_state_block_pool.bo; - } - - genX(cmd_buffer_flush_compute_state)(cmd_buffer); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .ThreadGroupIDXDimension = x, - .ThreadGroupIDYDimension = y, - .ThreadGroupIDZDimension = z, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -#define GPGPU_DISPATCHDIMX 0x2500 -#define GPGPU_DISPATCHDIMY 0x2504 -#define GPGPU_DISPATCHDIMZ 0x2508 - -void genX(CmdDispatchIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - struct anv_bo *bo = buffer->bo; - uint32_t bo_offset = buffer->offset + offset; - - if (prog_data->uses_num_work_groups) { - cmd_buffer->state.num_workgroups_offset = bo_offset; - cmd_buffer->state.num_workgroups_bo = bo; - } - - genX(cmd_buffer_flush_compute_state)(cmd_buffer); - - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); - emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); - - anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), - .IndirectParameterEnable = true, - .SIMDSize = prog_data->simd_size / 16, - .ThreadDepthCounterMaximum = 0, - .ThreadHeightCounterMaximum = 0, - .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1, - .RightExecutionMask = pipeline->cs_right_mask, - .BottomExecutionMask = 0xffffffff); - - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH)); -} - -void -genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.current_pipeline != _3D) { - anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), -#if ANV_GEN >= 9 - .MaskBits = 3, -#endif - .PipelineSelection = _3D); - cmd_buffer->state.current_pipeline = _3D; - } -} - -static void -cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_device *device = cmd_buffer->device; - const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct anv_image_view *iview = - anv_cmd_buffer_get_depth_stencil_view(cmd_buffer); - const struct anv_image *image = iview ? iview->image : NULL; - const struct anv_format *anv_format = - iview ? anv_format_for_vk_format(iview->vk_format) : NULL; - const bool has_depth = iview && anv_format->has_depth; - const bool has_stencil = iview && anv_format->has_stencil; - - /* FIXME: Implement the PMA stall W/A */ - /* FIXME: Width and Height are wrong */ - - /* Emit 3DSTATE_DEPTH_BUFFER */ - if (has_depth) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .DepthWriteEnable = true, - .StencilWriteEnable = has_stencil, - .HierarchicalDepthBufferEnable = false, - .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev, - &image->depth_surface.isl), - .SurfacePitch = image->depth_surface.isl.row_pitch - 1, - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->depth_surface.offset, - }, - .Height = fb->height - 1, - .Width = fb->width - 1, - .LOD = 0, - .Depth = 1 - 1, - .MinimumArrayElement = 0, - .DepthBufferObjectControlState = GENX(MOCS), -#if ANV_GEN >= 8 - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2, -#endif - .RenderTargetViewExtent = 1 - 1); - } else { - /* Even when no depth buffer is present, the hardware requires that - * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says: - * - * If a null depth buffer is bound, the driver must instead bind depth as: - * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D - * 3DSTATE_DEPTH.Width = 1 - * 3DSTATE_DEPTH.Height = 1 - * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM - * 3DSTATE_DEPTH.SurfaceBaseAddress = 0 - * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0 - * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0 - * - * The PRM is wrong, though. The width and height must be programmed to - * actual framebuffer's width and height, even when neither depth buffer - * nor stencil buffer is present. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), - .SurfaceType = SURFTYPE_2D, - .SurfaceFormat = D16_UNORM, - .Width = fb->width - 1, - .Height = fb->height - 1, - .StencilWriteEnable = has_stencil); - } - - /* Emit 3DSTATE_STENCIL_BUFFER */ - if (has_stencil) { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), -#if ANV_GEN >= 8 || ANV_IS_HASWELL - .StencilBufferEnable = true, -#endif - .StencilBufferObjectControlState = GENX(MOCS), - - /* Stencil buffers have strange pitch. The PRM says: - * - * The pitch must be set to 2x the value computed based on width, - * as the stencil buffer is stored with two rows interleaved. - */ - .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1, - -#if ANV_GEN >= 8 - .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2, -#endif - .SurfaceBaseAddress = { - .bo = image->bo, - .offset = image->offset + image->stencil_surface.offset, - }); - } else { - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER)); - } - - /* Disable hierarchial depth buffers. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER)); - - /* Clear the clear params. */ - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS)); -} - -/** - * @see anv_cmd_buffer_set_subpass() - */ -void -genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer, - struct anv_subpass *subpass) -{ - cmd_buffer->state.subpass = subpass; - - cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; - - cmd_buffer_emit_depth_stencil(cmd_buffer); -} - -void genX(CmdBeginRenderPass)( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBegin, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); - ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - anv_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - - genX(flush_pipeline_select_3d)(cmd_buffer); - - const VkRect2D *render_area = &pRenderPassBegin->renderArea; - - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), - .ClippedDrawingRectangleYMin = render_area->offset.y, - .ClippedDrawingRectangleXMin = render_area->offset.x, - .ClippedDrawingRectangleYMax = - render_area->offset.y + render_area->extent.height - 1, - .ClippedDrawingRectangleXMax = - render_area->offset.x + render_area->extent.width - 1, - .DrawingRectangleOriginY = 0, - .DrawingRectangleOriginX = 0); - - genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdNextSubpass)( - VkCommandBuffer commandBuffer, - VkSubpassContents contents) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); - genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1); - anv_cmd_buffer_clear_subpass(cmd_buffer); -} - -void genX(CmdEndRenderPass)( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - anv_cmd_buffer_resolve_subpass(cmd_buffer); -} diff --git a/src/vulkan/genX_pipeline.c b/src/vulkan/genX_pipeline.c deleted file mode 100644 index 4c2e0bc..0000000 --- a/src/vulkan/genX_pipeline.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -#if (ANV_GEN == 9) -# include "genxml/gen9_pack.h" -#elif (ANV_GEN == 8) -# include "genxml/gen8_pack.h" -#elif (ANV_IS_HASWELL) -# include "genxml/gen75_pack.h" -#elif (ANV_GEN == 7) -# include "genxml/gen7_pack.h" -#endif - -VkResult -genX(compute_pipeline_create)( - VkDevice _device, - struct anv_pipeline_cache * cache, - const VkComputePipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipeline) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_pipeline *pipeline; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); - - pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pipeline == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - - pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); - - pipeline->blend_state.map = NULL; - - result = anv_reloc_list_init(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); - if (result != VK_SUCCESS) { - anv_free2(&device->alloc, pAllocator, pipeline); - return result; - } - pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; - pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); - pipeline->batch.relocs = &pipeline->batch_relocs; - - /* When we free the pipeline, we detect stages based on the NULL status - * of various prog_data pointers. Make them NULL by default. - */ - memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); - - pipeline->vs_simd8 = NO_KERNEL; - pipeline->vs_vec4 = NO_KERNEL; - pipeline->gs_kernel = NO_KERNEL; - - pipeline->active_stages = 0; - pipeline->total_scratch = 0; - - assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT); - ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module); - anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module, - pCreateInfo->stage.pName, - pCreateInfo->stage.pSpecializationInfo); - - pipeline->use_repclear = false; - - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; - - anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), - .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE], - .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), -#if ANV_GEN > 7 - .ScratchSpaceBasePointerHigh = 0, - .StackSize = 0, -#else - .GPGPUMode = true, -#endif - .MaximumNumberofThreads = device->info.max_cs_threads - 1, - .NumberofURBEntries = ANV_GEN <= 7 ? 0 : 2, - .ResetGatewayTimer = true, -#if ANV_GEN <= 8 - .BypassGatewayControl = true, -#endif - .URBEntryAllocationSize = ANV_GEN <= 7 ? 0 : 2, - .CURBEAllocationSize = 0); - - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; - uint32_t group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); - uint32_t remainder = group_size & (prog_data->simd_size - 1); - - if (remainder > 0) - pipeline->cs_right_mask = ~0u >> (32 - remainder); - else - pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); - - - *pPipeline = anv_pipeline_to_handle(pipeline); - - return VK_SUCCESS; -} diff --git a/src/vulkan/genX_pipeline_util.h b/src/vulkan/genX_pipeline_util.h deleted file mode 100644 index 696e2be..0000000 --- a/src/vulkan/genX_pipeline_util.h +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static uint32_t -vertex_element_comp_control(enum isl_format format, unsigned comp) -{ - uint8_t bits; - switch (comp) { - case 0: bits = isl_format_layouts[format].channels.r.bits; break; - case 1: bits = isl_format_layouts[format].channels.g.bits; break; - case 2: bits = isl_format_layouts[format].channels.b.bits; break; - case 3: bits = isl_format_layouts[format].channels.a.bits; break; - default: unreachable("Invalid component"); - } - - if (bits) { - return VFCOMP_STORE_SRC; - } else if (comp < 3) { - return VFCOMP_STORE_0; - } else if (isl_format_layouts[format].channels.r.type == ISL_UINT || - isl_format_layouts[format].channels.r.type == ISL_SINT) { - assert(comp == 3); - return VFCOMP_STORE_1_INT; - } else { - assert(comp == 3); - return VFCOMP_STORE_1_FP; - } -} - -static void -emit_vertex_input(struct anv_pipeline *pipeline, - const VkPipelineVertexInputStateCreateInfo *info, - const struct anv_graphics_pipeline_create_info *extra) -{ - uint32_t elements; - if (extra && extra->disable_vs) { - /* If the VS is disabled, just assume the user knows what they're - * doing and apply the layout blindly. This can only come from - * meta, so this *should* be safe. - */ - elements = 0; - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) - elements |= (1 << info->pVertexAttributeDescriptions[i].location); - } else { - /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; - assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); - elements = inputs_read >> VERT_ATTRIB_GENERIC0; - } - -#if ANV_GEN >= 8 - /* On BDW+, we only need to allocate space for base ids. Setting up - * the actual vertex and instance id is a separate packet. - */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; -#else - /* On Haswell and prior, vertex and instance id are created by using the - * ComponentControl fields, so we need an element for any of them. - */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid || - pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; -#endif - - uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; - if (elem_count == 0) - return; - - uint32_t *p; - - const uint32_t num_dwords = 1 + elem_count * 2; - p = anv_batch_emitn(&pipeline->batch, num_dwords, - GENX(3DSTATE_VERTEX_ELEMENTS)); - memset(p + 1, 0, (num_dwords - 1) * 4); - - for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - enum isl_format format = anv_get_isl_format(desc->format, - VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_TILING_LINEAR, - NULL); - - assert(desc->binding < 32); - - if ((elements & (1 << desc->location)) == 0) - continue; /* Binding unused */ - - uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1)); - - struct GENX(VERTEX_ELEMENT_STATE) element = { - .VertexBufferIndex = desc->binding, - .Valid = true, - .SourceElementFormat = format, - .EdgeFlagEnable = false, - .SourceElementOffset = desc->offset, - .Component0Control = vertex_element_comp_control(format, 0), - .Component1Control = vertex_element_comp_control(format, 1), - .Component2Control = vertex_element_comp_control(format, 2), - .Component3Control = vertex_element_comp_control(format, 3), - }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element); - -#if ANV_GEN >= 8 - /* On Broadwell and later, we have a separate VF_INSTANCING packet - * that controls instancing. On Haswell and prior, that's part of - * VERTEX_BUFFER_STATE which we emit later. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), - .InstancingEnable = pipeline->instancing_enable[desc->binding], - .VertexElementIndex = slot, - /* Vulkan so far doesn't have an instance divisor, so - * this is always 1 (ignored if not instancing). */ - .InstanceDataStepRate = 1); -#endif - } - - const uint32_t id_slot = __builtin_popcount(elements); - if (needs_svgs_elem) { - /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: - * "Within a VERTEX_ELEMENT_STATE structure, if a Component - * Control field is set to something other than VFCOMP_STORE_SRC, - * no higher-numbered Component Control fields may be set to - * VFCOMP_STORE_SRC" - * - * This means, that if we have BaseInstance, we need BaseVertex as - * well. Just do all or nothing. - */ - uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance) ? - VFCOMP_STORE_SRC : VFCOMP_STORE_0; - - struct GENX(VERTEX_ELEMENT_STATE) element = { - .VertexBufferIndex = 32, /* Reserved for this */ - .Valid = true, - .SourceElementFormat = ISL_FORMAT_R32G32_UINT, - .Component0Control = base_ctrl, - .Component1Control = base_ctrl, -#if ANV_GEN >= 8 - .Component2Control = VFCOMP_STORE_0, - .Component3Control = VFCOMP_STORE_0, -#else - .Component2Control = VFCOMP_STORE_VID, - .Component3Control = VFCOMP_STORE_IID, -#endif - }; - GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element); - } - -#if ANV_GEN >= 8 - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, - .VertexIDComponentNumber = 2, - .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, - .InstanceIDComponentNumber = 3, - .InstanceIDElementOffset = id_slot); -#endif -} - -static inline void -emit_urb_setup(struct anv_pipeline *pipeline) -{ -#if ANV_GEN == 7 - struct anv_device *device = pipeline->device; - - /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1: - * - * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall - * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, - * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, - * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL - * needs to be sent before any combination of VS associated 3DSTATE." - */ - anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL, - .DepthStallEnable = true, - .PostSyncOperation = WriteImmediateData, - .Address = { &device->workaround_bo, 0 }); -#endif - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), - .ConstantBufferOffset = 0, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_GS), - .ConstantBufferOffset = 4, - .ConstantBufferSize = 4); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_PS), - .ConstantBufferOffset = 8, - .ConstantBufferSize = 4); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS), - .VSURBStartingAddress = pipeline->urb.vs_start, - .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, - .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_GS), - .GSURBStartingAddress = pipeline->urb.gs_start, - .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, - .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_HS), - .HSURBStartingAddress = pipeline->urb.vs_start, - .HSURBEntryAllocationSize = 0, - .HSNumberofURBEntries = 0); - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_DS), - .DSURBStartingAddress = pipeline->urb.vs_start, - .DSURBEntryAllocationSize = 0, - .DSNumberofURBEntries = 0); -} - -static inline uint32_t -scratch_space(const struct brw_stage_prog_data *prog_data) -{ - return ffs(prog_data->total_scratch / 2048); -} - -static const uint32_t vk_to_gen_cullmode[] = { - [VK_CULL_MODE_NONE] = CULLMODE_NONE, - [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, - [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK, - [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH -}; - -static const uint32_t vk_to_gen_fillmode[] = { - [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID, - [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME, - [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT, -}; - -static const uint32_t vk_to_gen_front_face[] = { - [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1, - [VK_FRONT_FACE_CLOCKWISE] = 0 -}; - -static const uint32_t vk_to_gen_logic_op[] = { - [VK_LOGIC_OP_COPY] = LOGICOP_COPY, - [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, - [VK_LOGIC_OP_AND] = LOGICOP_AND, - [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, - [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, - [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP, - [VK_LOGIC_OP_XOR] = LOGICOP_XOR, - [VK_LOGIC_OP_OR] = LOGICOP_OR, - [VK_LOGIC_OP_NOR] = LOGICOP_NOR, - [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV, - [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, - [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, - [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, - [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, - [VK_LOGIC_OP_NAND] = LOGICOP_NAND, - [VK_LOGIC_OP_SET] = LOGICOP_SET, -}; - -static const uint32_t vk_to_gen_blend[] = { - [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO, - [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE, - [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, - [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR, - [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, - [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, - [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR, - [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA, - [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, - [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, - [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, - [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, -}; - -static const uint32_t vk_to_gen_blend_op[] = { - [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, - [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, - [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, - [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, - [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; - -static const uint32_t vk_to_gen_stencil_op[] = { - [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP, - [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO, - [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE, - [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT, - [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT, - [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT, - [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR, - [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR, -}; diff --git a/src/vulkan/genX_state_util.h b/src/vulkan/genX_state_util.h deleted file mode 100644 index 67f798a..0000000 --- a/src/vulkan/genX_state_util.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -static const uint8_t -anv_surftype(const struct anv_image *image, VkImageViewType view_type, - bool storage) -{ - switch (view_type) { - default: - unreachable("bad VkImageViewType"); - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_1D); - return SURFTYPE_1D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return storage ? SURFTYPE_2D : SURFTYPE_CUBE; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - assert(image->type == VK_IMAGE_TYPE_2D); - return SURFTYPE_2D; - case VK_IMAGE_VIEW_TYPE_3D: - assert(image->type == VK_IMAGE_TYPE_3D); - return SURFTYPE_3D; - } -} - -static enum isl_format -anv_surface_format(const struct anv_device *device, enum isl_format format, - bool storage) -{ - if (storage) { - return isl_lower_storage_image_format(&device->isl_dev, format); - } else { - return format; - } -} - -#if ANV_GEN > 7 || ANV_IS_HASWELL -static const uint32_t vk_to_gen_swizzle[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = SCS_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = SCS_ONE, - [VK_COMPONENT_SWIZZLE_R] = SCS_RED, - [VK_COMPONENT_SWIZZLE_G] = SCS_GREEN, - [VK_COMPONENT_SWIZZLE_B] = SCS_BLUE, - [VK_COMPONENT_SWIZZLE_A] = SCS_ALPHA -}; -#endif - -static inline uint32_t -vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) -{ - switch (filter) { - default: - assert(!"Invalid filter"); - case VK_FILTER_NEAREST: - return MAPFILTER_NEAREST; - case VK_FILTER_LINEAR: - return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; - } -} - -static inline uint32_t -vk_to_gen_max_anisotropy(float ratio) -{ - return (anv_clamp_f(ratio, 2, 16) - 2) / 2; -} - -static const uint32_t vk_to_gen_mipmap_mode[] = { - [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, - [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR -}; - -static const uint32_t vk_to_gen_tex_address[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, -}; - -static const uint32_t vk_to_gen_compare_op[] = { - [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, - [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, - [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL, - [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, - [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL, - [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, -}; diff --git a/src/vulkan/intel_icd.json.in b/src/vulkan/intel_icd.json.in deleted file mode 100644 index d9b363a..0000000 --- a/src/vulkan/intel_icd.json.in +++ /dev/null @@ -1,7 +0,0 @@ -{ - "file_format_version": "1.0.0", - "ICD": { - "library_path": "@install_libdir@/libvulkan_intel.so", - "abi_versions": "1.0.3" - } -} diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore deleted file mode 100644 index 5d05405..0000000 --- a/src/vulkan/tests/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -block_pool -block_pool_no_free -state_pool -state_pool_free_list_only -state_pool_no_free diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am deleted file mode 100644 index 883013d..0000000 --- a/src/vulkan/tests/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright © 2009 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -AM_CPPFLAGS = \ - $(INTEL_CFLAGS) \ - $(VALGRIND_CFLAGS) \ - $(DEFINES) \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa \ - -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/isl/ \ - -I$(top_srcdir)/src/vulkan - -LDADD = \ - $(top_builddir)/src/vulkan/libvulkan-test.la \ - $(PTHREAD_LIBS) -lm -lstdc++ - -check_PROGRAMS = \ - block_pool_no_free \ - state_pool_no_free \ - state_pool_free_list_only \ - state_pool - -TESTS = $(check_PROGRAMS) diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c deleted file mode 100644 index 86d1a76..0000000 --- a/src/vulkan/tests/block_pool_no_free.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 16 -#define BLOCKS_PER_THREAD 1024 -#define NUM_RUNS 64 - -struct job { - pthread_t thread; - unsigned id; - struct anv_block_pool *pool; - uint32_t blocks[BLOCKS_PER_THREAD]; - uint32_t back_blocks[BLOCKS_PER_THREAD]; -} jobs[NUM_THREADS]; - - -static void *alloc_blocks(void *_job) -{ - struct job *job = _job; - int32_t block, *data; - - for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - block = anv_block_pool_alloc(job->pool); - data = job->pool->map + block; - *data = block; - assert(block >= 0); - job->blocks[i] = block; - - block = anv_block_pool_alloc_back(job->pool); - data = job->pool->map + block; - *data = block; - assert(block < 0); - job->back_blocks[i] = -block; - } - - for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - block = job->blocks[i]; - data = job->pool->map + block; - assert(*data == block); - - block = -job->back_blocks[i]; - data = job->pool->map + block; - assert(*data == block); - } - - return NULL; -} - -static void validate_monotonic(uint32_t **blocks) -{ - /* A list of indices, one per thread */ - unsigned next[NUM_THREADS]; - memset(next, 0, sizeof(next)); - - int highest = -1; - while (true) { - /* First, we find which thread has the highest next element */ - int thread_max = -1; - int max_thread_idx = -1; - for (unsigned i = 0; i < NUM_THREADS; i++) { - if (next[i] >= BLOCKS_PER_THREAD) - continue; - - if (thread_max < blocks[i][next[i]]) { - thread_max = blocks[i][next[i]]; - max_thread_idx = i; - } - } - - /* The only way this can happen is if all of the next[] values are at - * BLOCKS_PER_THREAD, in which case, we're done. - */ - if (thread_max == -1) - break; - - /* That next element had better be higher than the previous highest */ - assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); - - highest = blocks[max_thread_idx][next[max_thread_idx]]; - next[max_thread_idx]++; - } -} - -static void run_test() -{ - struct anv_device device; - struct anv_block_pool pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&pool, &device, 16); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = &pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); - - /* Validate that the block allocations were monotonic */ - uint32_t *block_ptrs[NUM_THREADS]; - for (unsigned i = 0; i < NUM_THREADS; i++) - block_ptrs[i] = jobs[i].blocks; - validate_monotonic(block_ptrs); - - /* Validate that the back block allocations were monotonic */ - for (unsigned i = 0; i < NUM_THREADS; i++) - block_ptrs[i] = jobs[i].back_blocks; - validate_monotonic(block_ptrs); - - anv_block_pool_finish(&pool); - pthread_mutex_destroy(&device.mutex); -} - -int main(int argc, char **argv) -{ - for (unsigned i = 0; i < NUM_RUNS; i++) - run_test(); -} diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c deleted file mode 100644 index 878ec19..0000000 --- a/src/vulkan/tests/state_pool.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 8 -#define STATES_PER_THREAD_LOG2 10 -#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) -#define NUM_RUNS 64 - -#include "state_pool_test_helper.h" - -int main(int argc, char **argv) -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - - for (unsigned i = 0; i < NUM_RUNS; i++) { - anv_block_pool_init(&block_pool, &device, 256); - anv_state_pool_init(&state_pool, &block_pool); - - /* Grab one so a zero offset is impossible */ - anv_state_pool_alloc(&state_pool, 16, 16); - - run_state_pool_test(&state_pool); - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - } - - pthread_mutex_destroy(&device.mutex); -} diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c deleted file mode 100644 index 2f4eb47..0000000 --- a/src/vulkan/tests/state_pool_free_list_only.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 8 -#define STATES_PER_THREAD_LOG2 12 -#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) - -#include "state_pool_test_helper.h" - -int main(int argc, char **argv) -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&block_pool, &device, 4096); - anv_state_pool_init(&state_pool, &block_pool); - - /* Grab one so a zero offset is impossible */ - anv_state_pool_alloc(&state_pool, 16, 16); - - /* Grab and return enough states that the state pool test below won't - * actually ever resize anything. - */ - { - struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; - for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { - states[i] = anv_state_pool_alloc(&state_pool, 16, 16); - assert(states[i].offset != 0); - } - - for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) - anv_state_pool_free(&state_pool, states[i]); - } - - run_state_pool_test(&state_pool); - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - pthread_mutex_destroy(&device.mutex); -} diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c deleted file mode 100644 index 4b248c2..0000000 --- a/src/vulkan/tests/state_pool_no_free.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "anv_private.h" - -#define NUM_THREADS 16 -#define STATES_PER_THREAD 1024 -#define NUM_RUNS 64 - -struct job { - pthread_t thread; - unsigned id; - struct anv_state_pool *pool; - uint32_t offsets[STATES_PER_THREAD]; -} jobs[NUM_THREADS]; - -pthread_barrier_t barrier; - -static void *alloc_states(void *_job) -{ - struct job *job = _job; - - pthread_barrier_wait(&barrier); - - for (unsigned i = 0; i < STATES_PER_THREAD; i++) { - struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); - job->offsets[i] = state.offset; - } - - return NULL; -} - -static void run_test() -{ - struct anv_device device; - struct anv_block_pool block_pool; - struct anv_state_pool state_pool; - - pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&block_pool, &device, 64); - anv_state_pool_init(&state_pool, &block_pool); - - pthread_barrier_init(&barrier, NULL, NUM_THREADS); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = &state_pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); - - /* A list of indices, one per thread */ - unsigned next[NUM_THREADS]; - memset(next, 0, sizeof(next)); - - int highest = -1; - while (true) { - /* First, we find which thread has the highest next element */ - int thread_max = -1; - int max_thread_idx = -1; - for (unsigned i = 0; i < NUM_THREADS; i++) { - if (next[i] >= STATES_PER_THREAD) - continue; - - if (thread_max < jobs[i].offsets[next[i]]) { - thread_max = jobs[i].offsets[next[i]]; - max_thread_idx = i; - } - } - - /* The only way this can happen is if all of the next[] values are at - * BLOCKS_PER_THREAD, in which case, we're done. - */ - if (thread_max == -1) - break; - - /* That next element had better be higher than the previous highest */ - assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); - - highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; - next[max_thread_idx]++; - } - - anv_state_pool_finish(&state_pool); - anv_block_pool_finish(&block_pool); - pthread_mutex_destroy(&device.mutex); -} - -int main(int argc, char **argv) -{ - for (unsigned i = 0; i < NUM_RUNS; i++) - run_test(); -} diff --git a/src/vulkan/tests/state_pool_test_helper.h b/src/vulkan/tests/state_pool_test_helper.h deleted file mode 100644 index 0e56431..0000000 --- a/src/vulkan/tests/state_pool_test_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -struct job { - struct anv_state_pool *pool; - unsigned id; - pthread_t thread; -} jobs[NUM_THREADS]; - -pthread_barrier_t barrier; - -static void *alloc_states(void *void_job) -{ - struct job *job = void_job; - - const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); - const unsigned num_chunks = STATES_PER_THREAD / chunk_size; - - struct anv_state states[chunk_size]; - - pthread_barrier_wait(&barrier); - - for (unsigned c = 0; c < num_chunks; c++) { - for (unsigned i = 0; i < chunk_size; i++) { - states[i] = anv_state_pool_alloc(job->pool, 16, 16); - memset(states[i].map, 139, 16); - assert(states[i].offset != 0); - } - - for (unsigned i = 0; i < chunk_size; i++) - anv_state_pool_free(job->pool, states[i]); - } - - return NULL; -} - -static void run_state_pool_test(struct anv_state_pool *state_pool) -{ - pthread_barrier_init(&barrier, NULL, NUM_THREADS); - - for (unsigned i = 0; i < NUM_THREADS; i++) { - jobs[i].pool = state_pool; - jobs[i].id = i; - pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); - } - - for (unsigned i = 0; i < NUM_THREADS; i++) - pthread_join(jobs[i].thread, NULL); -} -- cgit v1.1