From b47ce6ddb8be51d72d40ea0abc3d77b667c8552f Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 14 Nov 2016 11:37:03 +0000 Subject: docs: add sha256 checksums for 13.0.1 Signed-off-by: Emil Velikov --- docs/relnotes/13.0.1.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/13.0.1.html b/docs/relnotes/13.0.1.html index d317adb..0ce9fc1 100644 --- a/docs/relnotes/13.0.1.html +++ b/docs/relnotes/13.0.1.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

SHA256 checksums

-TBD
+7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5  mesa-13.0.1.tar.gz
+71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731  mesa-13.0.1.tar.xz
 
-- cgit v1.1 From 4685a724f5894a3426419b9b3d4f0ec129493ad1 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Wed, 23 Nov 2016 12:28:09 +0000 Subject: cherry-ignore: add reverted LLVM_LIBDIR patch The patch was reverted shortly after it was merged. Signed-off-by: Emil Velikov --- bin/.cherry-ignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore index 194252b..27f5d6d 100644 --- a/bin/.cherry-ignore +++ b/bin/.cherry-ignore @@ -1,2 +1,5 @@ # Commit was picked with -x 907ace57986733add2aebfa9dd7c83c67efed70e mapi: automake: set VISIBILITY_CFLAGS for shared glapi + +# Commit was reverted shortly after it landed in master +a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM -- cgit v1.1 From 9a4206379b0e36d440481ae89b98467ed53dc86b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 7 Nov 2016 09:05:59 -0800 Subject: vc4: Don't abort when a shader compile fails. It's much better to just skip the draw call entirely. Getting this information out of register allocation will also be useful for implementing threaded fragment shaders, which will need to retry non-threaded if RA fails. Cc: (cherry picked from commit 4d019bd703e7c20d56d5b858577607115b4926a3) --- src/gallium/drivers/vc4/vc4_context.h | 8 +++++++- src/gallium/drivers/vc4/vc4_draw.c | 5 ++++- src/gallium/drivers/vc4/vc4_program.c | 18 ++++++++++++++---- src/gallium/drivers/vc4/vc4_qir.h | 1 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 5 ++++- src/gallium/drivers/vc4/vc4_register_allocate.c | 3 ++- 6 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 313630a..c164eba 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -156,6 +156,12 @@ struct vc4_compiled_shader { bool disable_early_z; + /* Set if the compile failed, likely due to register allocation + * failure. In this case, we have no shader to run and should not try + * to do any draws. + */ + bool failed; + uint8_t num_inputs; /* Byte offsets for the start of the vertex attributes 0-7, and the @@ -449,7 +455,7 @@ void vc4_flush_jobs_reading_resource(struct vc4_context *vc4, void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); -void vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); bool vc4_rt_format_supported(enum pipe_format f); bool vc4_rt_format_is_565(enum pipe_format f); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 61c5842..c5afc0c 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -307,7 +307,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } vc4_start_draw(vc4); - vc4_update_compiled_shaders(vc4, info->mode); + if (!vc4_update_compiled_shaders(vc4, info->mode)) { + debug_warn_once("shader compile failed, skipping draw call.\n"); + return; + } vc4_emit_state(pctx); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0145488..fe07d91 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2437,9 +2437,15 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } } - copy_uniform_state_to_shader(shader, c); - shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, - c->qpu_inst_count * sizeof(uint64_t)); + shader->failed = c->failed; + if (c->failed) { + shader->failed = true; + } else { + copy_uniform_state_to_shader(shader, c); + shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, + c->qpu_inst_count * + sizeof(uint64_t)); + } /* Copy the compiler UBO range state to the compiled shader, dropping * out arrays that were never referenced by an indirect load. @@ -2642,11 +2648,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) } } -void +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) { vc4_update_compiled_fs(vc4, prim_mode); vc4_update_compiled_vs(vc4, prim_mode); + + return !(vc4->prog.cs->failed || + vc4->prog.vs->failed || + vc4->prog.fs->failed); } static uint32_t diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4d41c42..c76aeb2 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -523,6 +523,7 @@ struct vc4_compile { uint32_t program_id; uint32_t variant_id; + bool failed; }; /* Special nir_load_input intrinsic index for loading the current TLB diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 4d371c0..eedee55 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -565,10 +565,13 @@ vc4_generate_code_block(struct vc4_compile *c, void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { - struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); struct qblock *start_block = list_first_entry(&c->blocks, struct qblock, link); + struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); + if (!temp_registers) + return; + switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index fc44764..6c99b05 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -323,7 +323,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) if (!ok) { fprintf(stderr, "Failed to register allocate:\n"); qir_dump(c); - abort(); + c->failed = true; + return NULL; } for (uint32_t i = 0; i < c->num_temps; i++) { -- cgit v1.1 From 64d7d70c5b4722f53f6080b35ec516462f1e191b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 7 Nov 2016 12:25:47 -0800 Subject: vc4: Clamp the shadow comparison value. Fixes piglit glsl-fs-shadow2D-clamp-z. Cc: (cherry picked from commit 08d51487e3b8cfb14ca2ece9545b2e2ed344e3cc) --- src/gallium/drivers/vc4/vc4_program.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index fe07d91..05e2021 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -451,6 +451,15 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg u0 = qir_uniform_f(c, 0.0f); struct qreg u1 = qir_uniform_f(c, 1.0f); if (c->key->tex[unit].compare_mode) { + /* From the GL_ARB_shadow spec: + * + * "Let Dt (D subscript t) be the depth texture + * value, in the range [0, 1]. Let R be the + * interpolated texture coordinate clamped to the + * range [0, 1]." + */ + compare = qir_SAT(c, compare); + switch (c->key->tex[unit].compare_func) { case PIPE_FUNC_NEVER: depth_output = qir_uniform_f(c, 0.0f); -- cgit v1.1 From 1ba7f6ce38a6af237bec60a623ad716d5d35bce0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 7 Oct 2016 23:02:25 -0700 Subject: anv/gen8: Stall when needed in Cmd(Set|Reset)Event Signed-off-by: Jason Ekstrand Reviewed-by: Chad Versace Cc: "13.0" (cherry picked from commit 71397042fea36a2a14b530b75829ad13f969fd00) --- src/intel/vulkan/gen8_cmd_buffer.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 0548a5e..f1dfe7b 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -513,6 +513,25 @@ genX(cmd_buffer_emit_hz_op)(struct anv_cmd_buffer *cmd_buffer, } } +/* Set of stage bits for which are pipelined, i.e. they get queued by the + * command streamer for later execution. + */ +#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ + (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \ + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \ + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \ + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \ + VK_PIPELINE_STAGE_TRANSFER_BIT | \ + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \ + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) + void genX(CmdSetEvent)( VkCommandBuffer commandBuffer, VkEvent _event, @@ -522,6 +541,11 @@ void genX(CmdSetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT, pc.PostSyncOperation = WriteImmediateData, pc.Address = (struct anv_address) { @@ -541,6 +565,11 @@ void genX(CmdResetEvent)( ANV_FROM_HANDLE(anv_event, event, _event); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = (struct anv_address) { -- cgit v1.1 From 64c818d6a6586c8be9992018ffac5aedf390a8d7 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Nov 2016 14:45:37 -0700 Subject: anv/wsi: Set the fence to signaled in AcquireNextImageKHR Signed-off-by: Jason Ekstrand Reviewed-by: Chad Versace Cc: "13.0" (cherry picked from commit 73701be667ae408772bf20cb504b70d1775d4a4b) --- src/intel/vulkan/anv_wsi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 064581d..61d5db0 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -323,13 +323,20 @@ VkResult anv_AcquireNextImageKHR( VkSwapchainKHR _swapchain, uint64_t timeout, VkSemaphore semaphore, - VkFence fence, + VkFence _fence, uint32_t* pImageIndex) { ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); + ANV_FROM_HANDLE(anv_fence, fence, _fence); - return swapchain->acquire_next_image(swapchain, timeout, semaphore, - pImageIndex); + VkResult result = swapchain->acquire_next_image(swapchain, timeout, + semaphore, pImageIndex); + + /* Thanks to implicit sync, the image is ready immediately. */ + if (fence) + fence->ready = true; + + return result; } VkResult anv_QueuePresentKHR( -- cgit v1.1 From 8dab75a2eef2fc4d63416a42df651268c135b34a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 2 Nov 2016 09:11:11 -0700 Subject: anv: Rework fences Our previous fence implementation was very simple. Fences had two states: signaled and unsignaled. However, this didn't properly handle all of the edge-cases that we need to handle. In order to handle the case where the client calls vkGetFenceStatus on a fence that has not yet been submitted via vkQueueSubmit, we need a three-status system. In order to handle the case where the client calls vkWaitForFences on fences which have not yet been submitted, we need more complex logic and a condition variable. It's rather annoying but, so long as the client doesn't do that, we should still hit the fast path and use i915_gem_wait to do all our waiting. Signed-off-by: Jason Ekstrand Cc: "13.0" (cherry picked from commit 843775bab78a6b4d5cb4f02bd95d9d0e95c1c5e3) --- src/intel/vulkan/anv_device.c | 159 ++++++++++++++++++++++++++++++++++------- src/intel/vulkan/anv_private.h | 15 +++- src/intel/vulkan/anv_wsi.c | 2 +- 3 files changed, 150 insertions(+), 26 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e83887c..a9aa646 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -896,6 +896,12 @@ VkResult anv_CreateDevice( pthread_mutex_init(&device->mutex, NULL); + pthread_condattr_t condattr; + pthread_condattr_init(&condattr); + pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC); + pthread_cond_init(&device->queue_submit, NULL); + pthread_condattr_destroy(&condattr); + anv_bo_pool_init(&device->batch_bo_pool, device); anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); @@ -1141,6 +1147,11 @@ VkResult anv_QueueSubmit( result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); if (result != VK_SUCCESS) goto out; + + /* Update the fence and wake up any waiters */ + assert(fence->state == ANV_FENCE_STATE_RESET); + fence->state = ANV_FENCE_STATE_SUBMITTED; + pthread_cond_broadcast(&device->queue_submit); } out: @@ -1518,7 +1529,7 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - fence->ready = false; + fence->state = ANV_FENCE_STATE_RESET; *pFence = anv_fence_to_handle(fence); @@ -1544,7 +1555,7 @@ VkResult anv_ResetFences( { for (uint32_t i = 0; i < fenceCount; i++) { ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->ready = false; + fence->state = ANV_FENCE_STATE_RESET; } return VK_SUCCESS; @@ -1559,26 +1570,41 @@ VkResult anv_GetFenceStatus( int64_t t = 0; int ret; - if (fence->ready) - return VK_SUCCESS; + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* If it hasn't even been sent off to the GPU yet, it's not ready */ + return VK_NOT_READY; - ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == 0) { - fence->ready = true; + case ANV_FENCE_STATE_SIGNALED: + /* It's been signaled, return success */ return VK_SUCCESS; - } - return VK_NOT_READY; + case ANV_FENCE_STATE_SUBMITTED: + /* It's been submitted to the GPU but we don't know if it's done yet. */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->state = ANV_FENCE_STATE_SIGNALED; + return VK_SUCCESS; + } else { + return VK_NOT_READY; + } + default: + unreachable("Invalid fence status"); + } } +#define NSEC_PER_SEC 1000000000 +#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) + VkResult anv_WaitForFences( VkDevice _device, uint32_t fenceCount, const VkFence* pFences, VkBool32 waitAll, - uint64_t timeout) + uint64_t _timeout) { ANV_FROM_HANDLE(anv_device, device, _device); + int ret; /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed * to block indefinitely timeouts <= 0. Unfortunately, this was broken @@ -1587,22 +1613,107 @@ VkResult anv_WaitForFences( * best we can do is to clamp the timeout to INT64_MAX. This limits the * maximum timeout from 584 years to 292 years - likely not a big deal. */ - if (timeout > INT64_MAX) - timeout = INT64_MAX; - - int64_t t = timeout; + int64_t timeout = MIN2(_timeout, INT64_MAX); + + uint32_t pending_fences = fenceCount; + while (pending_fences) { + pending_fences = 0; + bool signaled_fences = false; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + switch (fence->state) { + case ANV_FENCE_STATE_RESET: + /* This fence hasn't been submitted yet, we'll catch it the next + * time around. Yes, this may mean we dead-loop but, short of + * lots of locking and a condition variable, there's not much that + * we can do about that. + */ + pending_fences++; + continue; + + case ANV_FENCE_STATE_SIGNALED: + /* This fence is not pending. If waitAll isn't set, we can return + * early. Otherwise, we have to keep going. + */ + if (!waitAll) + return VK_SUCCESS; + continue; + + case ANV_FENCE_STATE_SUBMITTED: + /* These are the fences we really care about. Go ahead and wait + * on it until we hit a timeout. + */ + ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); + if (ret == -1 && errno == ETIME) { + return VK_TIMEOUT; + } else if (ret == -1) { + /* We don't know the real error. */ + return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); + } else { + fence->state = ANV_FENCE_STATE_SIGNALED; + signaled_fences = true; + if (!waitAll) + return VK_SUCCESS; + continue; + } + } + } - /* FIXME: handle !waitAll */ + if (pending_fences && !signaled_fences) { + /* If we've hit this then someone decided to vkWaitForFences before + * they've actually submitted any of them to a queue. This is a + * fairly pessimal case, so it's ok to lock here and use a standard + * pthreads condition variable. + */ + pthread_mutex_lock(&device->mutex); + + /* It's possible that some of the fences have changed state since the + * last time we checked. Now that we have the lock, check for + * pending fences again and don't wait if it's changed. + */ + uint32_t now_pending_fences = 0; + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + if (fence->state == ANV_FENCE_STATE_RESET) + now_pending_fences++; + } + assert(now_pending_fences <= pending_fences); + + if (now_pending_fences == pending_fences) { + struct timespec before; + clock_gettime(CLOCK_MONOTONIC, &before); + + uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; + uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + + (timeout / NSEC_PER_SEC); + abs_nsec %= NSEC_PER_SEC; + + /* Avoid roll-over in tv_sec on 32-bit systems if the user + * provided timeout is UINT64_MAX + */ + struct timespec abstime; + abstime.tv_nsec = abs_nsec; + abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); + + ret = pthread_cond_timedwait(&device->queue_submit, + &device->mutex, &abstime); + assert(ret != EINVAL); + + struct timespec after; + clock_gettime(CLOCK_MONOTONIC, &after); + uint64_t time_elapsed = + ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - + ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); + + if (time_elapsed >= timeout) { + pthread_mutex_unlock(&device->mutex); + return VK_TIMEOUT; + } + + timeout -= time_elapsed; + } - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - int ret = anv_gem_wait(device, fence->bo.gem_handle, &t); - if (ret == -1 && errno == ETIME) { - return VK_TIMEOUT; - } else if (ret == -1) { - /* We don't know the real error. */ - return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, - "gem wait failed: %m"); + pthread_mutex_unlock(&device->mutex); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 31b4766..06cdc0a 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -577,6 +577,7 @@ struct anv_device { uint32_t default_mocs; pthread_mutex_t mutex; + pthread_cond_t queue_submit; }; void anv_device_get_cache_uuid(void *uuid); @@ -1251,11 +1252,23 @@ anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +enum anv_fence_state { + /** Indicates that this is a new (or newly reset fence) */ + ANV_FENCE_STATE_RESET, + + /** Indicates that this fence has been submitted to the GPU but is still + * (as far as we know) in use by the GPU. + */ + ANV_FENCE_STATE_SUBMITTED, + + ANV_FENCE_STATE_SIGNALED, +}; + struct anv_fence { struct anv_bo bo; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - bool ready; + enum anv_fence_state state; }; struct anv_event { diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 61d5db0..b95e965 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -334,7 +334,7 @@ VkResult anv_AcquireNextImageKHR( /* Thanks to implicit sync, the image is ready immediately. */ if (fence) - fence->ready = true; + fence->state = ANV_FENCE_STATE_SIGNALED; return result; } -- cgit v1.1 From 0a2c318d9c48bce0d5865be69c008631a5f98e87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Nov 2016 10:20:31 -0800 Subject: vulkan/wsi/wayland: Include pthread.h We use pthreads and, for some reason, it wasn't getting included Signed-off-by: Jason Ekstrand Cc: "13.0" (cherry picked from commit 3b6abfc69ac485006cbedba7bcad234888cad44f) --- src/vulkan/wsi/wsi_common_wayland.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 196ee28..79f7e96 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "wsi_common_wayland.h" -- cgit v1.1 From a4b67f664e6a52898e681b35ca769e1fd206a4d1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 9 Nov 2016 10:21:03 -0800 Subject: vulkan/wsi/wayland: Clean up some error handling paths This gets rid of all the memory leaks reported by the WSI CTS tests. Signed-off-by: Jason Ekstrand Reviewed-by: Dave Airlie Cc: "13.0" (cherry picked from commit 302f641d14f5c4d1560b6a0170803e21bd4bb976) --- src/vulkan/wsi/wsi_common_wayland.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 79f7e96..a61b74d 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -322,6 +322,8 @@ wsi_wl_get_display(struct wsi_device *wsi_device, pthread_mutex_unlock(&wsi->mutex); struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display); + if (!display) + return NULL; pthread_mutex_lock(&wsi->mutex); @@ -398,6 +400,8 @@ wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface, VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wl_display *display = wsi_wl_get_display(wsi_device, surface->display); + if (!display) + return VK_ERROR_OUT_OF_HOST_MEMORY; uint32_t count = u_vector_length(&display->formats); @@ -827,6 +831,10 @@ wsi_wl_finish_wsi(struct wsi_device *wsi_device, (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; if (wsi) { + struct hash_entry *entry; + hash_table_foreach(wsi->displays, entry) + wsi_wl_display_destroy(wsi, entry->data); + _mesa_hash_table_destroy(wsi->displays, NULL); pthread_mutex_destroy(&wsi->mutex); -- cgit v1.1 From dfd6b765ba25a3f9a40abbb85111e1369495beb7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 8 Nov 2016 11:06:05 -0800 Subject: glcpp: Handle '#version 0' and other invalid values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #version directive can only handle decimal constants. Enforce that the value is a decimal constant. Section 3.3 (Preprocessor) of the GLSL 4.50 spec says: The language version a shader is written to is specified by #version number profile opt where number must be a version of the language, following the same convention as __VERSION__ above. The same section also says: __VERSION__ will substitute a decimal integer reflecting the version number of the OpenGL shading language. Use a separate flag to track whether or not the #version line has been encountered. Any possible sentinel (0 is currently used) could be specified in a #version directive. This would lead to trying to (internally) redefine __VERSION__. Since there is no parser location for this addition, NULL is passed. This eventually results in a NULL dereference and a segfault. Attempts to use -1 as the sentinel would also fail if '#version 4294967295' or '#version 18446744073709551615' were used. We should have piglit tests for both of these. Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97420 Reviewed-by: Nicolai Hähnle Cc: mesa-stable@lists.freedesktop.org Cc: Juan A. Suarez Romero Cc: Karol Herbst (cherry picked from commit e85a747e294762785df2ce8a299c153254c6fca2) --- src/compiler/glsl/glcpp/glcpp-parse.y | 25 +++++++++++++++++++------ src/compiler/glsl/glcpp/glcpp.h | 9 +++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 4fd1448..7656325 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -176,7 +176,7 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value); * (such as the and start conditions in the lexer). */ %token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS %token PASTE -%type INTEGER operator SPACE integer_constant +%type INTEGER operator SPACE integer_constant version_constant %type expression %type IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA %type identifier_list @@ -424,14 +424,14 @@ control_line_success: | HASH_TOKEN ENDIF { _glcpp_parser_skip_stack_pop (parser, & @1); } NEWLINE -| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { - if (parser->version != 0) { +| HASH_TOKEN VERSION_TOKEN version_constant NEWLINE { + if (parser->version_set) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); } -| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { - if (parser->version != 0) { +| HASH_TOKEN VERSION_TOKEN version_constant IDENTIFIER NEWLINE { + if (parser->version_set) { glcpp_error(& @1, parser, "#version must appear on the first line"); } _glcpp_parser_handle_version_declaration(parser, $3, $4, true); @@ -470,6 +470,17 @@ integer_constant: $$ = $1; } +version_constant: + INTEGER_STRING { + /* Both octal and hexadecimal constants begin with 0. */ + if ($1[0] == '0' && $1[1] != '\0') { + glcpp_error(&@1, parser, "invalid #version \"%s\" (not a decimal constant)", $1); + $$ = 0; + } else { + $$ = strtoll($1, NULL, 10); + } + } + expression: integer_constant { $$.value = $1; @@ -1376,6 +1387,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api parser->state = state; parser->api = api; parser->version = 0; + parser->version_set = false; parser->has_new_line_number = 0; parser->new_line_number = 1; @@ -2318,10 +2330,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio const char *es_identifier, bool explicitly_set) { - if (parser->version != 0) + if (parser->version_set) return; parser->version = version; + parser->version_set = true; add_builtin_define (parser, "__VERSION__", version); diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index cab4374..fcee812 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -207,6 +207,15 @@ struct glcpp_parser { void *state; gl_api api; unsigned version; + + /** + * Has the #version been set? + * + * A separate flag is used because any possible sentinel value in + * \c ::version could also be set by a #version line. + */ + bool version_set; + bool has_new_line_number; int new_line_number; bool has_new_source_number; -- cgit v1.1 From 953030bbb3635605b1ce8e5369ab6722b8602543 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 8 Nov 2016 11:14:49 -0800 Subject: glsl: Parse 0 as a preprocessor INTCONSTANT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows a more reasonable error message for '#version 0' of 0:1(10): error: GLSL 0.00 is not supported. Supported versions are: 1.10, 1.20, 1.30, 1.00 ES, 3.00 ES, 3.10 ES, and 3.20 ES instead of 0:1(10): error: syntax error, unexpected $undefined, expecting INTCONSTANT Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97420 Reviewed-by: Nicolai Hähnle Cc: mesa-stable@lists.freedesktop.org Cc: Juan A. Suarez Romero Cc: Karol Herbst (cherry picked from commit c8c46641af43edd106528ac0293db5aa02a2364e) --- src/compiler/glsl/glsl_lexer.ll | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index d5e5d4c..450faeb 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -253,6 +253,10 @@ HASH ^{SPC}#{SPC} yylval->n = strtol(yytext, NULL, 10); return INTCONSTANT; } +0 { + yylval->n = 0; + return INTCONSTANT; + } \n { BEGIN 0; yylineno++; yycolumn = 0; return EOL; } . { return yytext[0]; } -- cgit v1.1 From 6520a64c4dadf03e2991a997c2399e3cc181b5c2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 10 Nov 2016 10:32:08 +1000 Subject: radv: fix texturesamples to handle single sample case We can only read the valid samples if this is an MSAA texture, which means the type field must be 0x14 or 0x15. This fixes: dEQP-VK.glsl.texture_functions.query.texturesamples.* Cc: "13.0" Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie (cherry picked from commit 2de85eb97ab2ef45ec23f694a566cd0ec8192885) --- src/amd/common/ac_nir_to_llvm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f235cc2..d76f3fc 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3299,17 +3299,25 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) } if (instr->op == nir_texop_texture_samples) { - LLVMValueRef res, samples; + LLVMValueRef res, samples, is_msaa; res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, ""); samples = LLVMBuildExtractElement(ctx->builder, res, LLVMConstInt(ctx->i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->builder, samples, + LLVMConstInt(ctx->i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->builder, is_msaa, + LLVMConstInt(ctx->i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->i32, 0xe, false), ""); + samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, false), ""); samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, false), ""); samples = LLVMBuildShl(ctx->builder, ctx->i32one, samples, ""); - + samples = LLVMBuildSelect(ctx->builder, is_msaa, samples, + ctx->i32one, ""); result = samples; goto write_result; } -- cgit v1.1 From cf8b11fc6ce5618117bb48aca108ea448438a926 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 4 Nov 2016 15:42:48 -0700 Subject: vulkan/wsi: Report the correct min/maxImageCount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the Vulkan spec 1.0.32 section 29.6 docs for vkAcquireNextImageKHR: "Let n be the total number of images in the swapchain, m be the value of VkSurfaceCapabilitiesKHR::minImageCount, and a be the number of presentable images that the application has currently acquired (i.e. images acquired with vkAcquireNextImageKHR, but not yet presented with vkQueuePresentKHR). vkAcquireNextImageKHR can always succeed if a ≤ n - m at the time vkAcquireNextImageKHR is called. vkAcquireNextImageKHR should not be called if a > n - m with a timeout of UINT64_MAX; in such a case, vkAcquireNextImageKHR may block indefinitely." With minImageCount == 2 (as it was previously, the client is allowed to acquire all but one image withoutblocking. If we really need 4 images for mailbox mode + pageflipping, then we need to request a minimum of 4 images up-front. This is a bit unfortunate because it means we will always consume 4 images. In the future, we may be able to optimize this a bit by waiting until the server starts to flip and returning OUT_OF_DATE to get the client to re-allocate with more images or something like that. Signed-off-by: Jason Ekstrand Reviewed-by: Dave Airlie Cc: "13.0" (cherry picked from commit 4fa0ca80eeeac813affcbb0129ed61f1534d8df0) --- src/vulkan/wsi/wsi_common_wayland.c | 25 ++++++++++--------------- src/vulkan/wsi/wsi_common_x11.c | 21 ++++++++++----------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index a61b74d..d28c430 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -42,8 +42,6 @@ memcpy((dest), (src), (count) * sizeof(*(src))); \ }) -#define MIN_NUM_IMAGES 2 - struct wsi_wayland; struct wsi_wl_display { @@ -369,8 +367,16 @@ static VkResult wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface, VkSurfaceCapabilitiesKHR* caps) { - caps->minImageCount = MIN_NUM_IMAGES; - caps->maxImageCount = 4; + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the Wayland compositor + * 4) One to render to + */ + caps->minImageCount = 4; + /* There is no real maximum */ + caps->maxImageCount = 0; + caps->currentExtent = (VkExtent2D) { -1, -1 }; caps->minImageExtent = (VkExtent2D) { 1, 1 }; caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX }; @@ -690,17 +696,6 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, int num_images = pCreateInfo->minImageCount; - assert(num_images >= MIN_NUM_IMAGES); - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); - size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = vk_alloc(pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 2280651..de71b19 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -371,8 +371,16 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } + /* For true mailbox mode, we need at least 4 images: + * 1) One to scan out from + * 2) One to have queued for scan-out + * 3) One to be currently held by the X server + * 4) One to render to + */ caps->minImageCount = 2; - caps->maxImageCount = 4; + /* There is no real maximum */ + caps->maxImageCount = 0; + caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; caps->maxImageArrayLayers = 1; @@ -737,16 +745,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR); - int num_images = pCreateInfo->minImageCount; - - /* For true mailbox mode, we need at least 4 images: - * 1) One to scan out from - * 2) One to have queued for scan-out - * 3) One to be currently held by the Wayland compositor - * 4) One to render to - */ - if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR) - num_images = MAX2(num_images, 4); + const unsigned num_images = pCreateInfo->minImageCount; size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]); chain = vk_alloc(pAllocator, size, 8, -- cgit v1.1 From 9d5c3fc12b05d944508ef4e3b1f2ddc4f23c0a82 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Nov 2016 22:36:39 -0800 Subject: i965/gs: Allow primitive id to be a system value This allows for gl_PrimitiveId to come in as a system value rather than as an input. This is the way it will come in from SPIR-V. We keeps the input path working for now so we don't break GL. Reviewed-by: Kenneth Graunke Cc: "13.0" (cherry picked from commit a5e88e66e633aaeb587b274d80e21cd46c8ee2cb) [Emil Velikov: nir_shader::info is not a pointer in branch] Signed-off-by: Emil Velikov Conflicts: src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 59c7d21..b0ee289 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -626,7 +626,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); prog_data->include_primitive_id = - (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; + (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) || + (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)); prog_data->invocations = shader->info.gs.invocations; -- cgit v1.1 From e3fe51dbeee6f9d7520e99ac83efcc57646a2253 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 5 Nov 2016 15:28:37 +0100 Subject: Fix races during _mesa_HashWalk(). There is currently no protection against walking a hash (using _mesa_HashWalk()) and modifying it at the same time, for instance by inserting or deleting elements. This leads to segfaults in multithreaded code if e.g. someone calls glTexImage2D (which may have to walk the list of FBOs) while another thread is calling glDeleteFramebuffers on another thread with the two contexts sharing lists. The reason for this is that _mesa_HashWalk() doesn't actually take the mutex that normally protects the hash; it takes an entirely different mutex. Thus, walks are only protected against other walks, and there is also no outer lock taking this. There is an old comment saying that this is to fix problems with deadlock if the callback needs to take a mutex; we solve this by changing the mutex to be recursive. A demonstration Helgrind hit from a real application: ==13412== Possible data race during write of size 8 at 0x3498C6A8 by thread #1 ==13412== Locks held: 2, at addresses 0x1AF09530 0x2B3DF400 ==13412== at 0x1F040C99: _mesa_hash_table_remove (hash_table.c:395) ==13412== by 0x1EE98174: _mesa_HashRemove_unlocked (hash.c:350) ==13412== by 0x1EE98174: _mesa_HashRemove (hash.c:365) ==13412== by 0x1EE2372D: _mesa_DeleteFramebuffers (fbobject.c:2669) ==13412== by 0x6105AA4: movit::ResourcePool::cleanup_unlinked_fbos(void*) (resource_pool.cpp:473) ==13412== by 0x610615B: movit::ResourcePool::release_fbo(unsigned int) (resource_pool.cpp:442) [...] ==13412== This conflicts with a previous read of size 8 by thread #20 ==13412== Locks held: 2, at addresses 0x1AF09558 0x1AF73318 ==13412== at 0x1F040CD9: _mesa_hash_table_next_entry (hash_table.c:415) ==13412== by 0x1EE982A8: _mesa_HashWalk (hash.c:426) ==13412== by 0x1EED6DFD: _mesa_update_fbo_texture.part.33 (teximage.c:2683) ==13412== by 0x1EED9410: _mesa_update_fbo_texture (teximage.c:3043) ==13412== by 0x1EED9410: teximage (teximage.c:3073) ==13412== by 0x1EEDA28F: _mesa_TexImage2D (teximage.c:3105) ==13412== by 0x166A68: operator() (mixer.cpp:454) There are many more interactions than just these two possible. Cc: 11.2 12.0 13.0 Signed-off-by: Steinar H. Gunderson Reviewed-by: Timothy Arceri (cherry picked from commit 2e2562cabbe9a1d3fb997ccaccc20ba31b2006c3) --- src/mesa/main/hash.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/mesa/main/hash.c b/src/mesa/main/hash.c index 7d8a5fd..670438a 100644 --- a/src/mesa/main/hash.c +++ b/src/mesa/main/hash.c @@ -59,7 +59,6 @@ struct _mesa_HashTable { struct hash_table *ht; GLuint MaxKey; /**< highest key inserted so far */ mtx_t Mutex; /**< mutual exclusion lock */ - mtx_t WalkMutex; /**< for _mesa_HashWalk() */ GLboolean InDeleteAll; /**< Debug check */ /** Value that would be in the table for DELETED_KEY_VALUE. */ void *deleted_key_data; @@ -129,8 +128,11 @@ _mesa_NewHashTable(void) } _mesa_hash_table_set_deleted_key(table->ht, uint_key(DELETED_KEY_VALUE)); - mtx_init(&table->Mutex, mtx_plain); - mtx_init(&table->WalkMutex, mtx_plain); + /* + * Needs to be recursive, since the callback in _mesa_HashWalk() + * is allowed to call _mesa_HashRemove(). + */ + mtx_init(&table->Mutex, mtx_recursive); } else { _mesa_error_no_memory(__func__); @@ -161,7 +163,6 @@ _mesa_DeleteHashTable(struct _mesa_HashTable *table) _mesa_hash_table_destroy(table->ht, NULL); mtx_destroy(&table->Mutex); - mtx_destroy(&table->WalkMutex); free(table); } @@ -401,11 +402,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table, /** * Walk over all entries in a hash table, calling callback function for each. - * Note: we use a separate mutex in this function to avoid a recursive - * locking deadlock (in case the callback calls _mesa_HashRemove()) and to - * prevent multiple threads/contexts from getting tangled up. - * A lock-less version of this function could be used when the table will - * not be modified. * \param table the hash table to walk * \param callback the callback function * \param userData arbitrary pointer to pass along to the callback @@ -422,13 +418,13 @@ _mesa_HashWalk(const struct _mesa_HashTable *table, assert(table); assert(callback); - mtx_lock(&table2->WalkMutex); + mtx_lock(&table2->Mutex); hash_table_foreach(table->ht, entry) { callback((uintptr_t)entry->key, entry->data, userData); } if (table->deleted_key_data) callback(DELETED_KEY_VALUE, table->deleted_key_data, userData); - mtx_unlock(&table2->WalkMutex); + mtx_unlock(&table2->Mutex); } static void -- cgit v1.1 From fd5fe00f7bb672cad9da19329acac88dcc2086f7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 11 Nov 2016 14:04:42 -0800 Subject: vc4: Fix register class handling of DDX/DDY arguments. I had this exactly backwards, but apparently the piglit tests were all landing in r0-r3 anyway. Cc: "13.0" (cherry picked from commit 977d8b526b983c8d19df00af224033389f8ab7c8) --- src/gallium/drivers/vc4/vc4_register_allocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 6c99b05..ab343ee 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -247,7 +247,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_ROT_MUL: assert(inst->src[0].file == QFILE_TEMP); - class_bits[inst->src[0].index] &= ~CLASS_BIT_R0_R3; + class_bits[inst->src[0].index] &= CLASS_BIT_R0_R3; break; default: -- cgit v1.1 From 4b2caa02f048d73b2e7dc27b9ad363e0f29a5d02 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 14 Nov 2016 17:26:09 +0000 Subject: anv: fix multi level clears with VK_REMAINING_MIP_LEVELS A commit from the CTS suite on the 1.0-dev branch started using VK_REMAINING_MIP_LEVELS, we're not dealing with it properly for clears. Fixes: dEQP-VK.api.image_clearing.clear_color_image.* Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Cc: "13.0" (cherry picked from commit a46bc3f70a22a71dc2977f7394841e1b19bb68b6) --- src/intel/vulkan/anv_blorp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 87f242c..73882445 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -787,7 +787,7 @@ void anv_CmdClearColorImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); @@ -847,7 +847,7 @@ void anv_CmdClearDepthStencilImage( unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = pRanges[r].layerCount; - for (unsigned i = 0; i < pRanges[r].levelCount; i++) { + for (unsigned i = 0; i < anv_get_levelCount(image, &pRanges[r]); i++) { const unsigned level = pRanges[r].baseMipLevel + i; const unsigned level_width = anv_minify(image->extent.width, level); const unsigned level_height = anv_minify(image->extent.height, level); -- cgit v1.1 From 607cac69f895836b7633042b52bf7d6b6c3b23c9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Nov 2016 00:15:02 -0800 Subject: intel: Set min_ds_entries on Broxton. This was missing. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky (cherry picked from commit 341fc0073a3c05fd43e9c7a33613bcb881f25f33) --- src/intel/common/gen_device_info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c index 1dc1769..c7b8e56 100644 --- a/src/intel/common/gen_device_info.c +++ b/src/intel/common/gen_device_info.c @@ -391,6 +391,7 @@ static const struct gen_device_info gen_device_info_bxt = { .urb = { .size = 192, .min_vs_entries = 34, + .min_ds_entries = 34, .max_vs_entries = 704, .max_tcs_entries = 256, .max_tes_entries = 416, @@ -413,6 +414,7 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = { .urb = { .size = 128, .min_vs_entries = 34, + .min_ds_entries = 34, .max_vs_entries = 352, .max_tcs_entries = 128, .max_tes_entries = 208, -- cgit v1.1 From 154cb647218999bdc1b2535ffdc85baf933e718e Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 15 Nov 2016 02:18:25 -0800 Subject: isl: Fix height calculation in isl_msaa_interleaved_scale_px_to_sa No known fixed tests, but it looks like a typo from: commit 8ac99eabb6570f0f3c5f7d7da1332a99ce636362 intel/isl: Add a helper for getting the size of an interleaved pixel Cc: "13.0" Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand (cherry picked from commit 0ac57afa6fbe59e9fd8eef38365cb3da8ec67f95) --- src/intel/isl/isl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index 7831c5e..32463b1 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -339,7 +339,7 @@ isl_msaa_interleaved_scale_px_to_sa(uint32_t samples, if (width) *width = isl_align(*width, 2) * px_size_sa.width; if (height) - *height = isl_align(*height, 2) * px_size_sa.width; + *height = isl_align(*height, 2) * px_size_sa.height; } static enum isl_array_pitch_span -- cgit v1.1 From a3f628ca25fc64022a27287f18e8c7fbe20c7e56 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Nov 2016 21:18:50 +0000 Subject: wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR This fixes the x11 and wayland backends to not assert: dEQP-VK.wsi.xcb.swapchain.get_images.incomplete Reviewed-by: Jason Ekstrand Cc: "13.0" Signed-off-by: Dave Airlie (cherry picked from commit 253fa25d09b77e18f736b97da07d57be0e6c4200) --- src/vulkan/wsi/wsi_common_wayland.c | 16 +++++++++++----- src/vulkan/wsi/wsi_common_x11.c | 16 +++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index d28c430..a8130ce 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -498,19 +498,25 @@ wsi_wl_swapchain_get_images(struct wsi_swapchain *wsi_chain, uint32_t *pCount, VkImage *pSwapchainImages) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + uint32_t ret_count; + VkResult result; if (pSwapchainImages == NULL) { *pCount = chain->image_count; return VK_SUCCESS; } - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = chain->images[i].image; + result = VK_SUCCESS; + ret_count = chain->image_count; + if (chain->image_count > *pCount) { + ret_count = *pCount; + result = VK_INCOMPLETE; + } - *pCount = chain->image_count; + for (uint32_t i = 0; i < ret_count; i++) + pSwapchainImages[i] = chain->images[i].image; - return VK_SUCCESS; + return result; } static VkResult diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index de71b19..73bd03c 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -498,19 +498,25 @@ x11_get_images(struct wsi_swapchain *anv_chain, uint32_t* pCount, VkImage *pSwapchainImages) { struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain; + uint32_t ret_count; + VkResult result; if (pSwapchainImages == NULL) { *pCount = chain->image_count; return VK_SUCCESS; } - assert(chain->image_count <= *pCount); - for (uint32_t i = 0; i < chain->image_count; i++) - pSwapchainImages[i] = chain->images[i].image; + result = VK_SUCCESS; + ret_count = chain->image_count; + if (chain->image_count > *pCount) { + ret_count = *pCount; + result = VK_INCOMPLETE; + } - *pCount = chain->image_count; + for (uint32_t i = 0; i < ret_count; i++) + pSwapchainImages[i] = chain->images[i].image; - return VK_SUCCESS; + return result; } static VkResult -- cgit v1.1 From 7bbe351e49618c35abb34a6b2512f248cd5fb80f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Nov 2016 20:11:51 +0000 Subject: radv: don't crash on null swapchain destroy. Just return if the passed in swapchain is NULL. Fixes: dEQP-VK.wsi.xlib.swapchain.destroy.null_handle Cc: "13.0" Signed-off-by: Dave Airlie (cherry picked from commit 38ab625c5f5ca06b78af2892422a966dc659f8cd) --- src/amd/vulkan/radv_wsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index a946bd4..1f1ab1c 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR( RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); const VkAllocationCallbacks *alloc; + if (!_swapchain) + return; + if (pAllocator) alloc = pAllocator; else -- cgit v1.1 From 145ecf60dd303d4c55949d5c437d1b307e9ba254 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Nov 2016 07:30:09 +0000 Subject: ac/nir/llvm: fix channel in texture gather lowering code. This fixes a number of CTS tests like: dEQP-VK.glsl.texture_gather.basic.2d.rgba8ui.size_npot.clamp_to_edge_repeat Cc: "13.0" Signed-off-by: Dave Airlie (cherry picked from commit 713522fb8d4366d29be18edc3d5f33faba1cb7c4) --- src/amd/common/ac_nir_to_llvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index d76f3fc..0b73e06 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1683,7 +1683,7 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, - ctx->i32zero, ""); + LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], -- cgit v1.1 From d3de9f5cb9d0c144e1769c8f4ab29b2b684230e3 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 18 Oct 2016 13:32:08 -0700 Subject: i965: Add some APL and KBL SKU strings We got a couple for products that exist on ark.intel.com, so let's just put them in now. Signed-off-by: Ben Widawsky (cherry picked from commit b8509c8936bdb3deaeac86e2ee9716c06d4e0865) Squashed with commit: i965: Fix KBL typo in string Signed-off-by: Ben Widawsky Reviewed-by: Anuj Phogat Reviewed-by: Kenneth Graunke (cherry picked from commit 19a01f8139f74d98548c87a0fd3cc2ff9c60b46b) --- include/pci_ids/i965_pci_ids.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 1566afd..c871a73 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -144,11 +144,11 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5") CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2") -CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)") CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2") CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2") CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2") -CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2") +CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)") CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F") CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") @@ -161,5 +161,5 @@ CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)") CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)") CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") -CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") +CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics 505 (Broxton)") +CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)") -- cgit v1.1 From ee56f5577d7e57ddb3440b51152742182174f390 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 18 Oct 2016 13:50:08 -0700 Subject: i965: Reorder PCI ID list to match release order I have some OCD... Signed-off-by: Ben Widawsky Reviewed-by: Dylan Baker (cherry picked from commit ffd9060b2388dcb4bc4d4e564dab23541f641830) --- include/pci_ids/i965_pci_ids.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index c871a73..dc411da 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)") CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)") CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3") CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3") +CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherrytrail)") +CHIPSET(0x22B1, chv, "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */ +CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") +CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)") CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1") @@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)") CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)") +CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)") +CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)") +CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") +CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics 505 (Broxton)") +CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)") CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1") CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1") @@ -154,12 +163,3 @@ CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") -CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherrytrail)") -CHIPSET(0x22B1, chv, "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */ -CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") -CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") -CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)") -CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)") -CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics 505 (Broxton)") -CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)") -- cgit v1.1 From 045420ea06b6aacdaab3dbf5915c3747093e6506 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 10 Nov 2016 10:20:11 -0800 Subject: i965/glk: Add basic Geminilake support v2: s/bdw/gen; Add the 2x6 config v3: Add min_ds_entries Cc: "13.0" Signed-off-by: Ben Widawsky Reviewed-by: Anuj Phogat Reviewed-by: Kenneth Graunke (cherry picked from commit 2193fb0e1f437b53672a03f74e40d4aebc503f9e) --- include/pci_ids/i965_pci_ids.h | 2 ++ src/intel/common/gen_device_info.c | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index dc411da..ffcaf33 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -163,3 +163,5 @@ CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3") CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4") +CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)") +CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)") diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c index c7b8e56..51fdf34 100644 --- a/src/intel/common/gen_device_info.c +++ b/src/intel/common/gen_device_info.c @@ -475,6 +475,52 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = { .num_slices = 3, }; +static const struct gen_device_info gen_device_info_glk = { + GEN9_FEATURES, + .is_broxton = 1, + .gt = 1, + .has_llc = false, + + .num_slices = 1, + .max_vs_threads = 112, + .max_tcs_threads = 112, + .max_tes_threads = 112, + .max_gs_threads = 112, + .max_cs_threads = 6 * 6, + .urb = { + .size = 192, + .min_vs_entries = 34, + .min_ds_entries = 34, + .max_vs_entries = 704, + .max_tcs_entries = 256, + .max_tes_entries = 416, + .max_gs_entries = 256, + } +}; + +static const struct gen_device_info gen_device_info_glk_2x6 = { + GEN9_FEATURES, + .is_broxton = 1, + .gt = 1, + .has_llc = false, + + .num_slices = 1, + .max_vs_threads = 56, /* XXX: guess */ + .max_tcs_threads = 56, /* XXX: guess */ + .max_tes_threads = 56, + .max_gs_threads = 56, + .max_cs_threads = 6 * 6, + .urb = { + .size = 128, + .min_vs_entries = 34, + .min_ds_entries = 34, + .max_vs_entries = 352, + .max_tcs_entries = 128, + .max_tes_entries = 208, + .max_gs_entries = 128, + } +}; + bool gen_get_device_info(int devid, struct gen_device_info *devinfo) { -- cgit v1.1 From 8dbdbc21910a6d37c381535186f9e728fff8690d Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Nov 2016 21:32:32 -0800 Subject: anv: Handle null in all destructors This fixes a bunch of new CTS tests which look for exactly this. Even in the cases where we just call vk_free to free a CPU data structure, we still handle NULL explicitly. This way we're less likely to forget to handle NULL later should we actually do something less trivial. Cc: "13.0" Reviewed-by: Dave Airlie (cherry picked from commit 49f08ad77f51cc344e4bfe60ba9f8d9fccfbd753) [Emil Velikov: color_rt_surface_state is still around] Signed-off-by: Emil Velikov Conflicts: src/intel/vulkan/anv_image.c --- src/intel/vulkan/anv_cmd_buffer.c | 6 ++++++ src/intel/vulkan/anv_descriptor_set.c | 12 ++++++++++++ src/intel/vulkan/anv_device.c | 15 +++++++++++++++ src/intel/vulkan/anv_image.c | 12 +++++++++++- src/intel/vulkan/anv_pass.c | 3 +++ src/intel/vulkan/anv_pipeline.c | 6 ++++++ src/intel/vulkan/anv_pipeline_cache.c | 3 +++ src/intel/vulkan/anv_query.c | 3 +++ src/intel/vulkan/anv_wsi.c | 6 ++++++ 9 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 7ff7dba..44ae67d 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -318,6 +318,9 @@ void anv_FreeCommandBuffers( for (uint32_t i = 0; i < commandBufferCount; i++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + if (!cmd_buffer) + continue; + anv_cmd_buffer_destroy(cmd_buffer); } } @@ -796,6 +799,9 @@ void anv_DestroyCommandPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool); + if (!pool) + return; + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) { anv_cmd_buffer_destroy(cmd_buffer); diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index 7d5a78d..17a1c8e 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -200,6 +200,9 @@ void anv_DestroyDescriptorSetLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + if (!set_layout) + return; + vk_free2(&device->alloc, pAllocator, set_layout); } @@ -282,6 +285,9 @@ void anv_DestroyPipelineLayout( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + if (!pipeline_layout) + return; + vk_free2(&device->alloc, pAllocator, pipeline_layout); } @@ -355,6 +361,9 @@ void anv_DestroyDescriptorPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool); + if (!pool) + return; + anv_state_stream_finish(&pool->surface_state_stream); vk_free2(&device->alloc, pAllocator, pool); } @@ -546,6 +555,9 @@ VkResult anv_FreeDescriptorSets( for (uint32_t i = 0; i < count; i++) { ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + if (!set) + continue; + anv_descriptor_set_destroy(device, pool, set); } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a9aa646..424fc52 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1544,6 +1544,9 @@ void anv_DestroyFence( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_fence, fence, _fence); + if (!fence) + return; + assert(fence->bo.map == fence); anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); } @@ -1783,6 +1786,9 @@ void anv_DestroyEvent( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_event, event, _event); + if (!event) + return; + anv_state_pool_free(&device->dynamic_state_pool, event->state); } @@ -1875,6 +1881,9 @@ void anv_DestroyBuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + if (!buffer) + return; + vk_free2(&device->alloc, pAllocator, buffer); } @@ -1902,6 +1911,9 @@ void anv_DestroySampler( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + if (!sampler) + return; + vk_free2(&device->alloc, pAllocator, sampler); } @@ -1946,5 +1958,8 @@ void anv_DestroyFramebuffer( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + if (!fb) + return; + vk_free2(&device->alloc, pAllocator, fb); } diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index b7c2e99..4a4d87e 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -275,8 +275,12 @@ anv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator) { ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, _image); + + if (!image) + return; - vk_free2(&device->alloc, pAllocator, anv_image_from_handle(_image)); + vk_free2(&device->alloc, pAllocator, image); } VkResult anv_BindImageMemory( @@ -565,6 +569,9 @@ anv_DestroyImageView(VkDevice _device, VkImageView _iview, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_image_view, iview, _iview); + if (!iview) + return; + if (iview->color_rt_surface_state.alloc_size > 0) { anv_state_pool_free(&device->surface_state_pool, iview->color_rt_surface_state); @@ -655,6 +662,9 @@ anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_buffer_view, view, bufferView); + if (!view) + return; + if (view->surface_state.alloc_size > 0) anv_state_pool_free(&device->surface_state_pool, view->surface_state); diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 6eaa5c8..1f35a42 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -146,6 +146,9 @@ void anv_DestroyRenderPass( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + if (!pass) + return; + vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); vk_free2(&device->alloc, pAllocator, pass); } diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 4b8020a..e543c98 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -75,6 +75,9 @@ void anv_DestroyShaderModule( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_shader_module, module, _module); + if (!module) + return; + vk_free2(&device->alloc, pAllocator, module); } @@ -189,6 +192,9 @@ void anv_DestroyPipeline( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + if (!pipeline) + return; + anv_reloc_list_finish(&pipeline->batch_relocs, pAllocator ? pAllocator : &device->alloc); if (pipeline->blend_state.map) diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index ff6e651..ddd51db 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -454,6 +454,9 @@ void anv_DestroyPipelineCache( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); + if (!cache) + return; + anv_pipeline_cache_finish(cache); vk_free2(&device->alloc, pAllocator, cache); diff --git a/src/intel/vulkan/anv_query.c b/src/intel/vulkan/anv_query.c index 4afdaaf..293257b 100644 --- a/src/intel/vulkan/anv_query.c +++ b/src/intel/vulkan/anv_query.c @@ -87,6 +87,9 @@ void anv_DestroyQueryPool( ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + if (!pool) + return; + anv_gem_munmap(pool->bo.map, pool->bo.size); anv_gem_close(device, pool->bo.gem_handle); vk_free2(&device->alloc, pAllocator, pool); diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index b95e965..c504658 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -76,6 +76,9 @@ void anv_DestroySurfaceKHR( ANV_FROM_HANDLE(anv_instance, instance, _instance); ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface); + if (!surface) + return; + vk_free2(&instance->alloc, pAllocator, surface); } @@ -294,6 +297,9 @@ void anv_DestroySwapchainKHR( ANV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain); const VkAllocationCallbacks *alloc; + if (!swapchain) + return; + if (pAllocator) alloc = pAllocator; else -- cgit v1.1 From 4c21d20dcf6df4eb83a403e9796f5196a8512377 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Nov 2016 21:46:13 -0800 Subject: anv/fence: Handle ANV_FENCE_CREATE_SIGNALED_BIT Cc: "13.0" Reviewed-by: Dave Airlie (cherry picked from commit 1c97432ce88ea272ff7d906cd36f70e09dafcab9) --- src/intel/vulkan/anv_device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 424fc52..3f0e32a 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1529,7 +1529,11 @@ VkResult anv_CreateFence( fence->execbuf.rsvd1 = device->context_id; fence->execbuf.rsvd2 = 0; - fence->state = ANV_FENCE_STATE_RESET; + if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { + fence->state = ANV_FENCE_STATE_SIGNALED; + } else { + fence->state = ANV_FENCE_STATE_RESET; + } *pFence = anv_fence_to_handle(fence); -- cgit v1.1 From 90bf0cb3132304be406efb153a9dcdc245335a26 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Nov 2016 22:31:32 -0800 Subject: nir/spirv: Fix handling of gl_PrimitiveId Before, we were always treating it as an output which bogus. The only stage in which this it can be an output is the geometry stage. In all other stages, it's an input which, in the back-end, we actually want to be a system value. Cc: "13.0" Reviewed-by: Dave Airlie (cherry picked from commit 955714759257e81f01f013c84d2bd7f14a0ec04f) --- src/compiler/spirv/vtn_variables.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 634058c..b66ceb2 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -805,8 +805,12 @@ vtn_get_builtin_location(struct vtn_builder *b, set_mode_system_value(mode); break; case SpvBuiltInPrimitiveId: - *location = VARYING_SLOT_PRIMITIVE_ID; - *mode = nir_var_shader_out; + if (*mode == nir_var_shader_out) { + *location = VARYING_SLOT_PRIMITIVE_ID; + } else { + *location = SYSTEM_VALUE_PRIMITIVE_ID; + set_mode_system_value(mode); + } break; case SpvBuiltInInvocationId: *location = SYSTEM_VALUE_INVOCATION_ID; -- cgit v1.1 From c94c804c2996f6c5111cdd3679650f29d630616a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 27 Oct 2016 22:42:02 -0700 Subject: anv/blorp: Ignore clears for attachments first used as resolve destinations Otherwise, we'll try to clear it the first time it's used as a draw so if you do some multisampled rendering, resolve to an attachment, and then draw on top of the single-sampled attachment, we might accidentally clear it. Cc: "13.0" (cherry picked from commit ccdf9af39265ef3478fac4d13b19e9d17fbbcab7) --- src/intel/vulkan/anv_blorp.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 73882445..d417469 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1141,15 +1141,6 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; struct anv_subpass *subpass = cmd_buffer->state.subpass; - /* FINISHME(perf): Skip clears for resolve attachments. - * - * From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a resolve - * attachment, then the loadOp is effectively ignored as the resolve is - * guaranteed to overwrite all pixels in the render area. - */ - if (!subpass->has_resolve) return; @@ -1163,6 +1154,17 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) if (dst_att == VK_ATTACHMENT_UNUSED) continue; + if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { + /* From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a + * resolve attachment, then the loadOp is effectively ignored + * as the resolve is guaranteed to overwrite all pixels in the + * render area. + */ + cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; + } + struct anv_image_view *src_iview = fb->attachments[src_att]; struct anv_image_view *dst_iview = fb->attachments[dst_att]; -- cgit v1.1 From 747052ee188fc17ee282ee26311187f131a7adfe Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 16 Nov 2016 20:24:25 -0800 Subject: i965: Fix compute shader crash. Fixes crashes when starting Deus Ex: Mankind Divided. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Anuj Phogat (cherry picked from commit ca76e6b5213c92432b9f3a641cb26f5861d53e09) --- src/mesa/drivers/dri/i965/brw_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index e7dcf47..c4493d4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -231,7 +231,7 @@ brw_upload_cs_prog(struct brw_context *brw) &brw->cs.base.prog_data)) { bool success = brw_codegen_cs_prog(brw, - ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE], cp, &key); (void) success; assert(success); -- cgit v1.1 From 1809f17bda56d4f9d6385f63a9c4a5df890e3cad Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 15 Nov 2016 11:53:33 -0800 Subject: mesa: Drop PATH_MAX usage. GNU/Hurd does not define PATH_MAX since it doesn't have such arbitrary limitation, so this failed to compile. Apparently glibc does not enforce PATH_MAX restrictions anyway, so it's kind of a hoax: https://www.gnu.org/software/libc/manual/html_node/Limits-for-Files.html MSVC uses a different name (_MAX_PATH) as well, which is annoying. We don't really need it. We can simply asprintf() the filenames. If the filename exceeds an OS path limit, presumably fopen() will fail, and we already check that. (We actually use ralloc_asprintf because Mesa provides that everywhere, and it doesn't look like we've provided an implementation of GNU's asprintf() for all platforms.) Fixes the build on GNU/Hurd. Cc: "13.0" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98632 Signed-off-by: Samuel Thibault Signed-off-by: Kenneth Graunke Reviewed-by: Emil Velikov (cherry picked from commit 9bfee7047b70cb0aa026ca9536465762f96cb2b1) [Emil Velikov: s|prog->Id|base->Id|] Signed-off-by: Emil Velikov Conflicts: src/mesa/main/arbprogram.c --- src/mesa/main/arbprogram.c | 12 ++++-------- src/mesa/main/shaderapi.c | 37 +++++++++++-------------------------- 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/src/mesa/main/arbprogram.c b/src/mesa/main/arbprogram.c index 911b6fa..53bd5e3 100644 --- a/src/mesa/main/arbprogram.c +++ b/src/mesa/main/arbprogram.c @@ -41,11 +41,6 @@ #include "program/program.h" #include "program/prog_print.h" -#ifdef _MSC_VER -#include -#define PATH_MAX _MAX_PATH -#endif - /** * Bind a program (make it current) * \note Called from the GL API dispatcher by both glBindProgramNV @@ -388,12 +383,12 @@ _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, const char *capture_path = _mesa_get_shader_capture_path(); if (capture_path != NULL) { FILE *file; - char filename[PATH_MAX]; const char *shader_type = target == GL_FRAGMENT_PROGRAM_ARB ? "fragment" : "vertex"; + char *filename = + ralloc_asprintf(NULL, "%s/%cp-%u.shader_test", + capture_path, shader_type[0], base->Id); - _mesa_snprintf(filename, sizeof(filename), "%s/%cp-%u.shader_test", - capture_path, shader_type[0], base->Id); file = fopen(filename, "w"); if (file) { fprintf(file, @@ -403,6 +398,7 @@ _mesa_ProgramStringARB(GLenum target, GLenum format, GLsizei len, } else { _mesa_warning(ctx, "Failed to open %s", filename); } + ralloc_free(filename); } } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index c40bb2d..2ed47f0 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -60,11 +60,6 @@ #include "util/hash_table.h" #include "util/mesa-sha1.h" -#ifdef _MSC_VER -#include -#define PATH_MAX _MAX_PATH -#endif - /** * Return mask of GLSL_x flags by examining the MESA_GLSL env var. */ @@ -112,13 +107,6 @@ _mesa_get_shader_capture_path(void) if (!read_env_var) { path = getenv("MESA_SHADER_CAPTURE_PATH"); read_env_var = true; - if (path && - strlen(path) > PATH_MAX - strlen("/fp-4294967295.shader_test")) { - GET_CURRENT_CONTEXT(ctx); - _mesa_warning(ctx, "MESA_SHADER_CAPTURE_PATH too long; ignoring " - "request to capture shaders"); - path = NULL; - } } return path; @@ -1101,11 +1089,8 @@ _mesa_link_program(struct gl_context *ctx, struct gl_shader_program *shProg) const char *capture_path = _mesa_get_shader_capture_path(); if (shProg->Name != 0 && shProg->Name != ~0 && capture_path != NULL) { FILE *file; - char filename[PATH_MAX]; - - _mesa_snprintf(filename, sizeof(filename), "%s/%u.shader_test", - capture_path, shProg->Name); - + char *filename = ralloc_asprintf(NULL, "%s/%u.shader_test", + capture_path, shProg->Name); file = fopen(filename, "w"); if (file) { fprintf(file, "[require]\nGLSL%s >= %u.%02u\n", @@ -1124,6 +1109,8 @@ _mesa_link_program(struct gl_context *ctx, struct gl_shader_program *shProg) } else { _mesa_warning(ctx, "Failed to open %s", filename); } + + ralloc_free(filename); } if (shProg->LinkStatus == GL_FALSE && @@ -1618,9 +1605,9 @@ generate_sha1(const char *source, char sha_str[64]) * * /_.glsl */ -static void +static char * construct_name(const gl_shader_stage stage, const char *source, - const char *path, char *name, unsigned length) + const char *path) { char sha[64]; static const char *types[] = { @@ -1628,8 +1615,7 @@ construct_name(const gl_shader_stage stage, const char *source, }; generate_sha1(source, sha); - _mesa_snprintf(name, length, "%s/%s_%s.glsl", path, types[stage], - sha); + return ralloc_asprintf(NULL, "%s/%s_%s.glsl", path, types[stage], sha); } /** @@ -1638,7 +1624,6 @@ construct_name(const gl_shader_stage stage, const char *source, static void dump_shader(const gl_shader_stage stage, const char *source) { - char name[PATH_MAX]; static bool path_exists = true; char *dump_path; FILE *f; @@ -1652,7 +1637,7 @@ dump_shader(const gl_shader_stage stage, const char *source) return; } - construct_name(stage, source, dump_path, name, PATH_MAX); + char *name = construct_name(stage, source, dump_path); f = fopen(name, "w"); if (f) { @@ -1663,6 +1648,7 @@ dump_shader(const gl_shader_stage stage, const char *source) _mesa_warning(ctx, "could not open %s for dumping shader (%s)", name, strerror(errno)); } + ralloc_free(name); } /** @@ -1672,7 +1658,6 @@ dump_shader(const gl_shader_stage stage, const char *source) static GLcharARB * read_shader(const gl_shader_stage stage, const char *source) { - char name[PATH_MAX]; char *read_path; static bool path_exists = true; int len, shader_size = 0; @@ -1688,9 +1673,9 @@ read_shader(const gl_shader_stage stage, const char *source) return NULL; } - construct_name(stage, source, read_path, name, PATH_MAX); - + char *name = construct_name(stage, source, read_path); f = fopen(name, "r"); + ralloc_free(name); if (!f) return NULL; -- cgit v1.1 From 8691daef62d3a40014757426c3f25960095b8d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Wed, 17 Aug 2016 10:37:45 +0300 Subject: mesa: fix empty program log length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we have empty log (""), we should return 0. This fixes Khronos WebGL conformance test 'program-infolog'. From OpenGL ES 3.1 (and OpenGL 4.5 Core) spec: "If pname is INFO_LOG_LENGTH , the length of the info log, including a null terminator, is returned. If there is no info log, zero is returned." v2: apply same fix for get_shaderiv and _mesa_GetProgramPipelineiv (Ian) Signed-off-by: Tapani Pälli Reviewed-by: Iago Toral Quiroga (v1) Reviewed-by: Ian Romanick Reviewed-by: Nicolai Hähnle Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97321 Cc: "13.0" (cherry picked from commit ec4e71f75e9b8a1c427994efa32a61593e3172f9) --- src/mesa/main/pipelineobj.c | 3 ++- src/mesa/main/shaderapi.c | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 8229840..310b745 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -645,7 +645,8 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) *params = pipe->ActiveProgram ? pipe->ActiveProgram->Name : 0; return; case GL_INFO_LOG_LENGTH: - *params = pipe->InfoLog ? strlen(pipe->InfoLog) + 1 : 0; + *params = (pipe->InfoLog && pipe->InfoLog[0] != '\0') ? + strlen(pipe->InfoLog) + 1 : 0; return; case GL_VALIDATE_STATUS: *params = pipe->Validated; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 2ed47f0..15f324b 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -642,7 +642,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, *params = shProg->Validated; return; case GL_INFO_LOG_LENGTH: - *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0; + *params = (shProg->InfoLog && shProg->InfoLog[0] != '\0') ? + strlen(shProg->InfoLog) + 1 : 0; return; case GL_ATTACHED_SHADERS: *params = shProg->NumShaders; @@ -890,7 +891,8 @@ get_shaderiv(struct gl_context *ctx, GLuint name, GLenum pname, GLint *params) *params = shader->CompileStatus; break; case GL_INFO_LOG_LENGTH: - *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0; + *params = (shader->InfoLog && shader->InfoLog[0] != '\0') ? + strlen(shader->InfoLog) + 1 : 0; break; case GL_SHADER_SOURCE_LENGTH: *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0; -- cgit v1.1 From 3c9e8660e936cb8e2d4dd44066d038cda0e664ef Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 14 Nov 2016 15:59:57 -0800 Subject: i965: Fix GS push inputs with enhanced layouts. We weren't taking first_component into account when handling GS push inputs. We hardly ever push GS inputs, so this was not caught by existing tests. When I started using component qualifiers for the gl_ClipDistance arrays, glsl-1.50-transform-feedback-type-and-size started catching this. Cc: "13.0" Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand (cherry picked from commit c4be6e0b8d91746eccf334b9e20861af4036d06a) --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4baadc9..e4102c6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1984,7 +1984,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, } else { for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dst, bld, i), - fs_reg(ATTR, imm_offset + i, dst.type)); + fs_reg(ATTR, imm_offset + i + first_component, dst.type)); } } return; -- cgit v1.1 From 7e9bdb40f34243201317ca0a3b74809a619e0e53 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Nov 2016 23:41:29 +0000 Subject: radv: make sure to flush input attachments correctly. This fixes 9 of the dEQP-VK.renderpass.attachment_allocation.input_output.* tests. Cc: "13.0" Reviewed-by: Edward O'Callaghan Signed-off-by: Dave Airlie (cherry picked from commit 51a44c0021398177d56f86b7fb8d63673186a380) --- src/amd/vulkan/radv_cmd_buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 690c739..9517e7a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2291,6 +2291,7 @@ void radv_CmdPipelineBarrier( break; case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; default: break; -- cgit v1.1 From 32adfd509df398696013704a39cde888361824bf Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 Nov 2016 06:46:50 +0000 Subject: radv: fix image view creation for depth and stencil only This fixes the image view for sampling just the depth. It removes some pointless swizzle code, and adds a missing case for the x8_d24 format. Fixes: dEQP-VK.renderpass.formats.d32_sfloat_s8_uint.input.* dEQP-VK.renderpass.formats.d24_unorm_s8_uint.input.* dEQP-VK.renderpass.formats.x8_d24_unorm_pack32.input.* Cc: "13.0" Signed-off-by: Dave Airlie (cherry picked from commit 6d7be52d90cd5f4798b9612e8a68f6d6d9e31c33) --- src/amd/vulkan/radv_formats.c | 1 + src/amd/vulkan/radv_image.c | 21 ++++++++------------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 76d5fa1..fe786b3 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format, case VK_FORMAT_D16_UNORM: return V_008F14_IMG_DATA_FORMAT_16; case VK_FORMAT_D24_UNORM_S8_UINT: + case VK_FORMAT_X8_D24_UNORM_PACK32: return V_008F14_IMG_DATA_FORMAT_8_24; case VK_FORMAT_S8_UINT: return V_008F14_IMG_DATA_FORMAT_8; diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 710eda1..3099d83 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device, if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) { const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; - const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; - - switch (vk_format) { - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle); - break; - default: - vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); - } + vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle); } else { vk_format_compose_swizzles(mapping, desc->swizzle, swizzle); } @@ -775,8 +765,13 @@ radv_image_view_init(struct radv_image_view *iview, iview->vk_format = pCreateInfo->format; iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) + if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { is_stencil = true; + iview->vk_format = vk_format_stencil_only(iview->vk_format); + } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { + iview->vk_format = vk_format_depth_only(iview->vk_format); + } + iview->extent = (VkExtent3D) { .width = radv_minify(image->extent.width , range->baseMipLevel), .height = radv_minify(image->extent.height, range->baseMipLevel), @@ -794,7 +789,7 @@ radv_image_view_init(struct radv_image_view *iview, si_make_texture_descriptor(device, image, false, iview->type, - pCreateInfo->format, + iview->vk_format, &pCreateInfo->components, 0, radv_get_levelCount(image, range) - 1, range->baseArrayLayer, -- cgit v1.1 From 6a3b5f32c23d3c901598ba8ec626086df9c79203 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 03:58:30 +0000 Subject: radv: spir-v allows texture size query with and without lod. The translation to llvm was failing here due to required lod. This fixes some new SteamVR shaders. Cc: "13.0" Reviewed-by: Edward O'Callaghan Signed-off-by: Dave Airlie (cherry picked from commit b1340fd708bb873617b8a529ac45cbc9507bd6c4) --- src/amd/common/ac_nir_to_llvm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 0b73e06..31d7b6e 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3416,7 +3416,10 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) address[count++] = sample_index; } else if(instr->op == nir_texop_txs) { count = 0; - address[count++] = lod; + if (lod) + address[count++] = lod; + else + address[count++] = ctx->i32zero; } for (chan = 0; chan < count; chan++) { -- cgit v1.1 From 9581776d53c963d7147666b4027dbdb55c9de95f Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 19 Nov 2016 14:05:06 -0800 Subject: anv: Implement a depth stall restriction on gen7 Fixes around 60 Vulkan CTS tests on Haswell Reviewed-by: Jordan Justen Cc: "13.0" (cherry picked from commit a8b85f1f772ef45cdeba9d5d205d105e689c3bdf) --- src/intel/vulkan/anv_genX.h | 2 ++ src/intel/vulkan/genX_blorp_exec.c | 2 ++ src/intel/vulkan/genX_cmd_buffer.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index d4ed325..9f843b3 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -42,6 +42,8 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer); +void genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer); + void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 185aff6..cd9780d 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -164,6 +164,8 @@ genX(blorp_exec)(struct blorp_batch *batch, genX(flush_pipeline_select_3d)(cmd_buffer); + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + blorp_exec(batch, params); cmd_buffer->state.vb_dirty = ~0; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2bc7e74..0645997 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1661,6 +1661,35 @@ genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer) } } +void +genX(cmd_buffer_emit_gen7_depth_flush)(struct anv_cmd_buffer *cmd_buffer) +{ + if (GEN_GEN >= 8) + return; + + /* From the Haswell PRM, documentation for 3DSTATE_DEPTH_BUFFER: + * + * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., any + * combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, + * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first + * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit + * set), followed by a pipelined depth cache flush (PIPE_CONTROL with + * Depth Flush Bit set, followed by another pipelined depth stall + * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise + * guarantee that the pipeline from WM onwards is already flushed (e.g., + * via a preceding MI_FLUSH)." + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthCacheFlushEnable = true; + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { + pipe.DepthStallEnable = true; + } +} + static void cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) { @@ -1677,6 +1706,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) /* FIXME: Implement the PMA stall W/A */ /* FIXME: Width and Height are wrong */ + genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer); + /* Emit 3DSTATE_DEPTH_BUFFER */ if (has_depth) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) { -- cgit v1.1 From 3d5b40fa763acb7c1564f9b4d0785f582210927b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 18 Nov 2016 15:18:10 +0100 Subject: radeonsi: store group_size_variable in struct si_compute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For compute shaders, we free the selector after the shader has been compiled, so we need to save this bit somewhere else. Also, make sure that this type of bug cannot re-appear, by NULL-ing the selector pointer after we're done with it. This bug has been there since the feature was added, but was only exposed in piglit arb_compute_variable_group_size-local-size by commit 9bfee7047b70cb0aa026ca9536465762f96cb2b1 (which is totally unrelated). Cc: 13.0 Reviewed-by: Marek Olšák (cherry picked from commit 42d5e91a2ae235c007c5d17935be9bb1c4ff388e) --- src/gallium/drivers/radeonsi/si_compute.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e785106..a35187c 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -42,7 +42,8 @@ struct si_compute { struct si_shader shader; struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; - bool use_code_object_v2; + unsigned use_code_object_v2 : 1; + unsigned variable_group_size : 1; }; struct dispatch_packet { @@ -147,7 +148,11 @@ static void *si_create_compute_state( S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | S_00B84C_LDS_SIZE(shader->config.lds_size); + program->variable_group_size = + sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; + FREE(sel.tokens); + program->shader.selector = NULL; } else { const struct pipe_llvm_program_header *header; const char *code; @@ -607,14 +612,12 @@ static void si_setup_tgsi_grid(struct si_context *sctx, } } else { struct si_compute *program = sctx->cs_shader_state.program; - bool variable_group_size = - program->shader.selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; - radeon_set_sh_reg_seq(cs, grid_size_reg, variable_group_size ? 6 : 3); + radeon_set_sh_reg_seq(cs, grid_size_reg, program->variable_group_size ? 6 : 3); radeon_emit(cs, info->grid[0]); radeon_emit(cs, info->grid[1]); radeon_emit(cs, info->grid[2]); - if (variable_group_size) { + if (program->variable_group_size) { radeon_emit(cs, info->block[0]); radeon_emit(cs, info->block[1]); radeon_emit(cs, info->block[2]); -- cgit v1.1 From 63e2bb2f36f4d51bc6223ed22da6aac1766868a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Thu, 17 Nov 2016 21:55:38 +0100 Subject: glsl/lower_output_reads: fix geometry shader output handling with conditional emit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consider a geometry shader that contains code like this: some_out = expr; if (cond) { ... EmitVertex(); } else { ... EmitVertex(); } Both branches should see the correct value of some_out. Since this is a rather subtle and rare case, I'm submitting a piglit test for this as well. GLSL says that the values of output variables are undefined after EmitVertex(). With this change, the values will now be defined and unmodified. This may reduce optimization opportunities in the probably quite rare case where subsequent compiler passes cannot prove that the value of the output variable is overwritten. Cc: 13.0 Reviewed-by: Edward O'Callaghan Reviewed-by: Marek Olšák (cherry picked from commit 0d383a79a8f13bb00ed5e5d84f41071b43c7e92d) --- src/compiler/glsl/lower_output_reads.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/glsl/lower_output_reads.cpp b/src/compiler/glsl/lower_output_reads.cpp index 732f4d3..8a375ac 100644 --- a/src/compiler/glsl/lower_output_reads.cpp +++ b/src/compiler/glsl/lower_output_reads.cpp @@ -157,7 +157,6 @@ ir_visitor_status output_read_remover::visit_leave(ir_emit_vertex *ir) { hash_table_call_foreach(replacements, emit_return_copy, ir); - _mesa_hash_table_clear(replacements, NULL); return visit_continue; } -- cgit v1.1 From d6964bbf54a526d4a28ff88c61349eb24a64af6f Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sat, 19 Nov 2016 14:52:29 -0800 Subject: i965/hsw: Set integer mode in sampling state for stencil texturing Fixes: ES31-CTS.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_pot ES31-CTS.functional.texture.border_clamp.formats.depth24_stencil8_sample_stencil.nearest_size_npot ES31-CTS.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_pot ES31-CTS.functional.texture.border_clamp.formats.depth32f_stencil8_sample_stencil.nearest_size_npot ES31-CTS.functional.texture.border_clamp.unused_channels.depth24_stencil8_sample_stencil ES31-CTS.functional.texture.border_clamp.unused_channels.depth32f_stencil8_sample_stencil Cc: "13.0" Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand (cherry picked from commit 44c5ed02d1b173c061c3188e245d384fd4c0abba) --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 18 +++++++++--------- src/mesa/drivers/dri/i965/brw_state.h | 9 --------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index 0eed8f9..b649072 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -213,7 +213,7 @@ static void upload_default_color(struct brw_context *brw, const struct gl_sampler_object *sampler, mesa_format format, GLenum base_format, - bool is_integer_format, + bool is_integer_format, bool is_stencil_sampling, uint32_t *sdc_offset) { union gl_color_union color; @@ -277,7 +277,7 @@ upload_default_color(struct brw_context *brw, uint32_t *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, 4 * 4, 64, sdc_offset); memcpy(sdc, color.ui, 4 * 4); - } else if (brw->is_haswell && is_integer_format) { + } else if (brw->is_haswell && (is_integer_format || is_stencil_sampling)) { /* Haswell's integer border color support is completely insane: * SAMPLER_BORDER_COLOR_STATE is 20 DWords. The first four are * for float colors. The next 12 DWords are MBZ and only exist to @@ -291,10 +291,9 @@ upload_default_color(struct brw_context *brw, memset(sdc, 0, 20 * 4); sdc = &sdc[16]; + bool stencil = format == MESA_FORMAT_S_UINT8 || is_stencil_sampling; const int bits_per_channel = - _mesa_get_format_bits(format, - format == MESA_FORMAT_S_UINT8 ? - GL_STENCIL_BITS : GL_RED_BITS); + _mesa_get_format_bits(format, stencil ? GL_STENCIL_BITS : GL_RED_BITS); /* From the Haswell PRM, "Command Reference: Structures", Page 36: * "If any color channel is missing from the surface format, @@ -389,12 +388,13 @@ upload_default_color(struct brw_context *brw, * Sets the sampler state for a single unit based off of the sampler key * entry. */ -void +static void brw_update_sampler_state(struct brw_context *brw, GLenum target, bool tex_cube_map_seamless, GLfloat tex_unit_lod_bias, mesa_format format, GLenum base_format, bool is_integer_format, + bool is_stencil_sampling, const struct gl_sampler_object *sampler, uint32_t *sampler_state, uint32_t batch_offset_for_sampler_state) @@ -516,8 +516,8 @@ brw_update_sampler_state(struct brw_context *brw, if (wrap_mode_needs_border_color(wrap_s) || wrap_mode_needs_border_color(wrap_t) || wrap_mode_needs_border_color(wrap_r)) { - upload_default_color(brw, sampler, - format, base_format, is_integer_format, + upload_default_color(brw, sampler, format, base_format, + is_integer_format, is_stencil_sampling, &border_color_offset); } @@ -555,7 +555,7 @@ update_sampler_state(struct brw_context *brw, brw_update_sampler_state(brw, texObj->Target, ctx->Texture.CubeMapSeamless, texUnit->LodBias, firstImage->TexFormat, firstImage->_BaseFormat, - texObj->_IsIntegerFormat, + texObj->_IsIntegerFormat, texObj->StencilSampling, sampler, sampler_state, batch_offset_for_sampler_state); } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b42b9af..b8aa97b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -337,15 +337,6 @@ void brw_emit_sampler_state(struct brw_context *brw, bool non_normalized_coordinates, uint32_t border_color_offset); -void brw_update_sampler_state(struct brw_context *brw, - GLenum target, bool tex_cube_map_seamless, - GLfloat tex_unit_lod_bias, - mesa_format format, GLenum base_format, - bool is_integer_format, - const struct gl_sampler_object *sampler, - uint32_t *sampler_state, - uint32_t batch_offset_for_sampler_state); - /* gen6_wm_state.c */ void gen6_upload_wm_state(struct brw_context *brw, -- cgit v1.1 From c19a3311398bdb6a2fd408f017f16a73ad8a801c Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Tue, 22 Nov 2016 00:21:23 +0900 Subject: util/disk_cache: close a previously opened handle in disk_cache_put (v2) We're missing the close() to the matching open(). CID 1373407 v2: Fixes from Emil Velikov's review Update the teardown in reverse order of the setup/init. Cc: "13.0" Signed-off-by: Mun Gwan-gyeong Reviewed-by: Emil Velikov Reviewed-by: Iago Toral Quiroga (v1) (cherry picked from commit 69cc7d90f9f60d95cd570a4e87755a474554d41f) --- src/compiler/glsl/cache.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/cache.c b/src/compiler/glsl/cache.c index 64a34f0..db934e5 100644 --- a/src/compiler/glsl/cache.c +++ b/src/compiler/glsl/cache.c @@ -612,19 +612,18 @@ cache_put(struct program_cache *cache, p_atomic_add(cache->size, size); + done: + if (fd_final != -1) + close(fd_final); /* This close finally releases the flock, (now that the final dile * has been renamed into place and the size has been added). */ - close(fd); - fd = -1; - - done: + if (fd != -1) + close(fd); if (filename_tmp) ralloc_free(filename_tmp); if (filename) ralloc_free(filename); - if (fd != -1) - close(fd); } void * -- cgit v1.1 From a39e535d6c508635d0f9a328ed91a54ce00a6476 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Sun, 20 Nov 2016 20:44:22 +0900 Subject: anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL Since both pCreateInfo->strideInBytes and pCreateInfo->extent.height are of uint32_t type 32-bit arithmetic will be used. Fix unintentional integer overflow by casting to uint64_t before multifying. CID 1394321 Cc: "13.0" Signed-off-by: Mun Gwan-gyeong [Emil Velikov: cast only of the arguments] Reviewed-by: Emil Velikov (cherry picked from commit e074a08a6ded3260f13111d0e23961dea2da2442) --- src/intel/vulkan/anv_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 1c50e2b..c356e84 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -55,7 +55,7 @@ VkResult anv_CreateDmaBufImageINTEL( goto fail; } - uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + uint64_t size = (uint64_t)pCreateInfo->strideInBytes * pCreateInfo->extent.height; anv_bo_init(&mem->bo, gem_handle, size); -- cgit v1.1 From 6c87a21497330a4640156c2e3cc7536754ac3931 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Nov 2016 20:17:24 -0800 Subject: anv/cmd_buffer: Handle running out of binding tables in compute shaders If we try to allocate a binding table and fail, we have to get a new binding table block, re-emit STATE_BASE_ADDRESS, and then try again. We already handle this correctly for 3D and blorp but it never got handled for CS. This fixes the new stress.lots-of-surface-state.cs.static crucible test. Reviewed-by: Lionel Landwerlin Reviewed-by: Jordan Justen Cc: "13.0" (cherry picked from commit 722ab3de9f0e30e1dfbbd2b5217330b85f53bcec) --- src/intel/vulkan/genX_cmd_buffer.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 0645997..b844460 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1343,12 +1343,22 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state surfaces = { 0, }, samplers = { 0, }; VkResult result; - result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); - if (result != VK_SUCCESS) - return result; result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); - if (result != VK_SUCCESS) - return result; + if (result != VK_SUCCESS) { + result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); + assert(result == VK_SUCCESS); + } + result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); + assert(result == VK_SUCCESS); + struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); -- cgit v1.1 From f77b0972232af2d53241983f020449d5cdbdfa9a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 21 Nov 2016 20:22:53 -0800 Subject: anv/cmd_buffer: Emit a CS stall before setting a CS pipeline Reviewed-by: Lionel Landwerlin Reviewed-by: Jordan Justen Cc: "13.0" (cherry picked from commit f680a01ad4ed360b44cd1b9d3c447a95cedbccd5) --- src/intel/vulkan/genX_cmd_buffer.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b844460..f1b5387 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1418,8 +1418,20 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_gpgpu)(cmd_buffer); - if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) + if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE) { + /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE: + * + * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless + * the only bits that are changed are scoreboard related: Scoreboard + * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For + * these scoreboard related states, a MEDIA_STATE_FLUSH is + * sufficient." + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) { -- cgit v1.1 From c10e1fb4408317c15886c0dadc80d7dce7ab23ff Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 22 Nov 2016 11:12:35 +0100 Subject: gbm: request correct version of the DRI2_FENCE extension There is no version 2 of the DRI2_FENCE extension. So only a request for version 1 has a chance to succeed. Fixes: 74b1969d717f (gbm: wire up fence extension) Cc: "13.0" Signed-off-by: Lucas Stach Reviewed-by: Emil Velikov (cherry picked from commit d9a3ad94cac34c51603660af6647290d238f64bc) --- src/gbm/backends/dri/gbm_dri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 51c4117..88f4428 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -245,7 +245,7 @@ struct dri_extension_match { static struct dri_extension_match dri_core_extensions[] = { { __DRI2_FLUSH, 1, offsetof(struct gbm_dri_device, flush) }, { __DRI_IMAGE, 1, offsetof(struct gbm_dri_device, image) }, - { __DRI2_FENCE, 2, offsetof(struct gbm_dri_device, fence), 1 }, + { __DRI2_FENCE, 1, offsetof(struct gbm_dri_device, fence), 1 }, { NULL, 0, 0 } }; -- cgit v1.1 From e692630755825afdac2873b676e785da13ae01af Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Thu, 6 Oct 2016 14:12:27 +0100 Subject: anv: fix enumeration of properties Driver should enumerate only up-to min2(num_available, num_requested) properties and return VK_INCOMPLETE if the # of requested props is smaller than the ones available. Presently we assert out in such cases. Inspired by a similar fix for RADV. v2: Use MIN2 + typed_memcpy (Jason). Should fix: dEQP-VK.api.info.device.extensions Cc: "13.0" Signed-off-by: Emil Velikov Reviewed-by: Eric Engestrom (v1) Reviewed-by: Jason Ekstrand (cherry picked from commit 5cc07d854c85e30c5aa1ced626b4b6500f8cd74c) --- src/intel/vulkan/anv_device.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 3f0e32a..125df22 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1007,10 +1007,11 @@ VkResult anv_EnumerateInstanceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(global_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); + typed_memcpy(pProperties, global_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(global_extensions); - memcpy(pProperties, global_extensions, sizeof(global_extensions)); + if (*pPropertyCount < ARRAY_SIZE(global_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } @@ -1026,10 +1027,11 @@ VkResult anv_EnumerateDeviceExtensionProperties( return VK_SUCCESS; } - assert(*pPropertyCount >= ARRAY_SIZE(device_extensions)); + *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); + typed_memcpy(pProperties, device_extensions, *pPropertyCount); - *pPropertyCount = ARRAY_SIZE(device_extensions); - memcpy(pProperties, device_extensions, sizeof(device_extensions)); + if (*pPropertyCount < ARRAY_SIZE(device_extensions)) + return VK_INCOMPLETE; return VK_SUCCESS; } -- cgit v1.1 From 9eea4ba5abe59f40bc89e681586e4d3b1fbda4c8 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 14 Nov 2016 12:36:57 +0100 Subject: anv/format: handle unsupported formats properly According to the spec for vkGetPhysicalDeviceImageFormatProperties: "If format is not a supported image format, or if the combination of format, type, tiling, usage, and flags is not supported for images, then vkGetPhysicalDeviceImageFormatProperties returns VK_ERROR_FORMAT_NOT_SUPPORTED." Makes the following Vulkan CTS tests report 'Not Supported' instead of crashing: dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_unorm dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_snorm dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_uscaled dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_sscaled dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_uint dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_sint dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8_srgb dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_unorm dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_snorm dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_uscaled dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_sscaled dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_uint dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_sint dEQP-VK.api.image_clearing.clear_color_image.1d_b8g8r8a8_srgb dEQP-VK.api.image_clearing.clear_color_image.1d_r4g4_unorm_pack8 dEQP-VK.api.image_clearing.clear_color_image.1d_r8_srgb dEQP-VK.api.image_clearing.clear_color_image.1d_r8g8_srgb dEQP-VK.api.image_clearing.clear_color_image.1d_r8g8b8_srgb dEQP-VK.api.image_clearing.clear_color_image.1d_b5g5r5a1_unorm_pack16 Reviewed-by: Lionel Landwerlin (cherry picked from commit 35deeda66f5fcbccb24f8eda42c8a4f67bb106c9) Squashed with: anv/format: handle unsupported formats earlier Reviewed-by: Jason Ekstrand (cherry picked from commit 277f868e6682b9ee398ed326425274c3d1898417) --- src/intel/vulkan/anv_formats.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index f691554..875a7d3 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -463,6 +463,9 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties( uint32_t maxArraySize; VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED) + goto unsupported; + anv_physical_device_get_format_properties(physical_device, format, &format_props); -- cgit v1.1 From 28c6c8d09e6bb468bfc53a57e12e579411c30941 Mon Sep 17 00:00:00 2001 From: Eduardo Lima Mitev Date: Tue, 25 Oct 2016 10:20:12 +0200 Subject: vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR x11_surface_get_formats() is currently asserting that the number of elements in pSurfaceFormats must be greater than or equal to the number of formats available. This is buggy because pSurfaceFormatsCount elements are later copied from the internal formats' array, so if pSurfaceFormatCount is greater, it will overflow it. On top of that, this assertion violates the spec. From the Vulkan 1.0 (revision 32, with KHR extensions), page 579 of the PDF: "If pSurfaceFormats is NULL, then the number of format pairs supported for the given surface is returned in pSurfaceFormatCount. Otherwise, pSurfaceFormatCount must point to a variable set by the user to the number of elements in the pSurfaceFormats array, and on return the variable is overwritten with the number of structures actually written to pSurfaceFormats. If the value of pSurfaceFormatCount is less than the number of format pairs supported, at most pSurfaceFormatCount structures will be written. If pSurfaceFormatCount is smaller than the number of format pairs supported for the given surface, VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned." So, the correct behavior is: if pSurfaceFormatCount is greater than the internal number of formats, it is clamped to that many formats. But if it is lesser than that, then pSurfaceFormatCount elements are copied, and the call returns VK_INCOMPLETE. Reviewed-by: Dave Airlie (cherry picked from commit 750d8cad72a532d977df10ffbbdd1902bd06f50b) Nominated-by: Emil Velikov Squashed with commit: vulkan/wsi/x11: Smplify implementation of vkGetPhysicalDeviceSurfaceFormatsKHR This patch simplifies x11_surface_get_formats(). It is actually just a readability improvement over the patch I provided earlier this week (750d8cad72). Reviewed-by: Eric Engestrom (cherry picked from commit 129da274261b6e79f459e24428591f137bf92ed1) --- src/vulkan/wsi/wsi_common_x11.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 73bd03c..6b7e133 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -404,11 +404,11 @@ x11_surface_get_formats(VkIcdSurfaceBase *surface, return VK_SUCCESS; } - assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats)); + *pSurfaceFormatCount = MIN2(*pSurfaceFormatCount, ARRAY_SIZE(formats)); typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount); - *pSurfaceFormatCount = ARRAY_SIZE(formats); - return VK_SUCCESS; + return *pSurfaceFormatCount < ARRAY_SIZE(formats) ? + VK_INCOMPLETE : VK_SUCCESS; } static VkResult -- cgit v1.1 From f7b58a378ca94cf1c2637d640ce5b9fb8f8519a6 Mon Sep 17 00:00:00 2001 From: Eduardo Lima Mitev Date: Fri, 28 Oct 2016 14:34:39 +0200 Subject: vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePresentModesKHR x11_surface_get_present_modes() is currently asserting that the number of elements in pPresentModeCount must be greater than or equal to the number of present modes available. This is buggy because pPresentModeCount elements are later copied from the internal modes' array, so if pPresentModeCount is greater, it will overflow it. On top of that, this assertion violates the spec. From the Vulkan 1.0 (revision 32, with KHR extensions), page 581 of the PDF: "If the value of pPresentModeCount is less than the number of presentation modes supported, at most pPresentModeCount values will be written. If pPresentModeCount is smaller than the number of presentation modes supported for the given surface, VK_INCOMPLETE will be returned instead of VK_SUCCESS to indicate that not all the available values were returned." So, the correct behavior is: if pPresentModeCount is greater than the internal number of formats, it is clamped to that many present modes. But if it is lesser than that, then pPresentModeCount elements are copied, and the call returns VK_INCOMPLETE. This fix is similar (but simpler and more readable) than the one I provided in 750d8cad72a for vkGetPhysicalDeviceSurfaceFormatsKHR, which was suffering from the same problem. Reviewed-by: Eric Engestrom (cherry picked from commit b677b99db5c48ffd1eeef538b962080ac5fd65d9) Nominated-by: Emil Velikov --- src/vulkan/wsi/wsi_common_x11.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 6b7e133..09718eb 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -421,11 +421,11 @@ x11_surface_get_present_modes(VkIcdSurfaceBase *surface, return VK_SUCCESS; } - assert(*pPresentModeCount >= ARRAY_SIZE(present_modes)); + *pPresentModeCount = MIN2(*pPresentModeCount, ARRAY_SIZE(present_modes)); typed_memcpy(pPresentModes, present_modes, *pPresentModeCount); - *pPresentModeCount = ARRAY_SIZE(present_modes); - return VK_SUCCESS; + return *pPresentModeCount < ARRAY_SIZE(present_modes) ? + VK_INCOMPLETE : VK_SUCCESS; } VkResult wsi_create_xcb_surface(const VkAllocationCallbacks *pAllocator, -- cgit v1.1