anv: Move relocation handling from EndCommandBuffer to QueueSubmit

Ever since the early days of the Vulkan driver, we've been setting up the lists of relocations at EndCommandBuffer time. The idea behind this was to move some of the CPU load out of QueueSubmit which the client is required to lock around and into command buffer building which could be done in parallel. Then QueueSubmit basically just becomes a bunch of execbuf2 calls. Technically, this works. However, when you start to do more in QueueSubmit than just execbuf2, you start to run into problems. In particular, if a block pool is resized between EndCommandBuffer and QueueSubmit, the list of anv_bo's and the execbuf2 object list can get out of sync. This can cause problems if, for instance, you wanted to do relocations in userspace. Signed-off-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com> Cc: "13.0" <mesa-stable@lists.freedesktop.org> (cherry picked from commit 8b61c57049ff75766715ad4f7b1ad2d3657b9b4d)
author: Jason Ekstrand <jason.ekstrand@intel.com> 2016-11-05 19:47:33 -0700
committer: Emil Velikov <emil.l.velikov@gmail.com> 2016-11-09 23:36:32 +0000
commit: 621b04873436e5875394cbe4d8733873e0c01d8e (patch)
tree: bb4d3b43c77672af77a022a394d58d493a78271b
parent: 039a03d8d2073d2f4234c02376e082e7f107d284 (diff)
download: external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.zip
external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.tar.gz
external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.tar.bz2
4 files changed, 76 insertions, 72 deletions
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 45cdb95..3493eeb 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -574,20 +574,6 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
    return VK_SUCCESS;
 }
 
-static void
-anv_execbuf_init(struct anv_execbuf *exec)
-{
-   memset(exec, 0, sizeof(*exec));
-}
-
-static void
-anv_execbuf_finish(struct anv_execbuf *exec,
-                   const VkAllocationCallbacks *alloc)
-{
-   vk_free(alloc, exec->objects);
-   vk_free(alloc, exec->bos);
-}
-
 VkResult
 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -635,8 +621,6 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
 
    anv_cmd_buffer_new_binding_table_block(cmd_buffer);
 
-   anv_execbuf_init(&cmd_buffer->execbuf2);
-
    return VK_SUCCESS;
 
  fail_bt_blocks:
@@ -668,8 +652,6 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
                             &cmd_buffer->batch_bos, link) {
       anv_batch_bo_destroy(bbo, cmd_buffer);
    }
-
-   anv_execbuf_finish(&cmd_buffer->execbuf2, &cmd_buffer->pool->alloc);
 }
 
 void
@@ -867,6 +849,31 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
                          &secondary->surface_relocs, 0);
 }
 
+struct anv_execbuf {
+   struct drm_i915_gem_execbuffer2           execbuf;
+
+   struct drm_i915_gem_exec_object2 *        objects;
+   uint32_t                                  bo_count;
+   struct anv_bo **                          bos;
+
+   /* Allocated length of the 'objects' and 'bos' arrays */
+   uint32_t                                  array_length;
+};
+
+static void
+anv_execbuf_init(struct anv_execbuf *exec)
+{
+   memset(exec, 0, sizeof(*exec));
+}
+
+static void
+anv_execbuf_finish(struct anv_execbuf *exec,
+                   const VkAllocationCallbacks *alloc)
+{
+   vk_free(alloc, exec->objects);
+   vk_free(alloc, exec->bos);
+}
+
 static VkResult
 anv_execbuf_add_bo(struct anv_execbuf *exec,
                    struct anv_bo *bo,
@@ -1037,20 +1044,20 @@ adjust_relocations_to_state_pool(struct anv_block_pool *pool,
    }
 }
 
-void
-anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
+VkResult
+anv_cmd_buffer_execbuf(struct anv_device *device,
+                       struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_batch *batch = &cmd_buffer->batch;
    struct anv_block_pool *ss_pool =
       &cmd_buffer->device->surface_state_block_pool;
 
-   cmd_buffer->execbuf2.bo_count = 0;
+   struct anv_execbuf execbuf;
+   anv_execbuf_init(&execbuf);
 
    adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
                                       cmd_buffer->last_ss_pool_center);
-
-   anv_execbuf_add_bo(&cmd_buffer->execbuf2, &ss_pool->bo,
-                      &cmd_buffer->surface_relocs,
+   anv_execbuf_add_bo(&execbuf, &ss_pool->bo, &cmd_buffer->surface_relocs,
                       &cmd_buffer->pool->alloc);
 
    /* First, we walk over all of the bos we've seen and add them and their
@@ -1061,7 +1068,7 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
       adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
                                        cmd_buffer->last_ss_pool_center);
 
-      anv_execbuf_add_bo(&cmd_buffer->execbuf2, &(*bbo)->bo, &(*bbo)->relocs,
+      anv_execbuf_add_bo(&execbuf, &(*bbo)->bo, &(*bbo)->relocs,
                          &cmd_buffer->pool->alloc);
    }
 
@@ -1079,20 +1086,19 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
     * corresponding to the first batch_bo in the chain with the last
     * element in the list.
     */
-   if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) {
+   if (first_batch_bo->bo.index != execbuf.bo_count - 1) {
       uint32_t idx = first_batch_bo->bo.index;
-      uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1;
+      uint32_t last_idx = execbuf.bo_count - 1;
 
-      struct drm_i915_gem_exec_object2 tmp_obj =
-         cmd_buffer->execbuf2.objects[idx];
-      assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo);
+      struct drm_i915_gem_exec_object2 tmp_obj = execbuf.objects[idx];
+      assert(execbuf.bos[idx] == &first_batch_bo->bo);
 
-      cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx];
-      cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx];
-      cmd_buffer->execbuf2.bos[idx]->index = idx;
+      execbuf.objects[idx] = execbuf.objects[last_idx];
+      execbuf.bos[idx] = execbuf.bos[last_idx];
+      execbuf.bos[idx]->index = idx;
 
-      cmd_buffer->execbuf2.objects[last_idx] = tmp_obj;
-      cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo;
+      execbuf.objects[last_idx] = tmp_obj;
+      execbuf.bos[last_idx] = &first_batch_bo->bo;
       first_batch_bo->bo.index = last_idx;
    }
 
@@ -1113,9 +1119,9 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
       }
    }
 
-   cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
-      .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
-      .buffer_count = cmd_buffer->execbuf2.bo_count,
+   execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
+      .buffers_ptr = (uintptr_t) execbuf.objects,
+      .buffer_count = execbuf.bo_count,
       .batch_start_offset = 0,
       .batch_len = batch->next - batch->start,
       .cliprects_ptr = 0,
@@ -1127,12 +1133,7 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
       .rsvd1 = cmd_buffer->device->context_id,
       .rsvd2 = 0,
    };
-}
 
-VkResult
-anv_cmd_buffer_execbuf(struct anv_device *device,
-                       struct anv_cmd_buffer *cmd_buffer)
-{
    /* Since surface states are shared between command buffers and we don't
     * know what order they will be submitted to the kernel, we don't know what
     * address is actually written in the surface state object at any given
@@ -1142,6 +1143,9 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
    for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
       cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
 
-   return anv_device_execbuf(device, &cmd_buffer->execbuf2.execbuf,
-                             cmd_buffer->execbuf2.bos);
+   VkResult result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
+
+   anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc);
+
+   return result;
 }
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index c40598c..c47961e 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1097,6 +1097,27 @@ VkResult anv_QueueSubmit(
    struct anv_device *device = queue->device;
    VkResult result = VK_SUCCESS;
 
+   /* We lock around QueueSubmit for two main reasons:
+    *
+    *  1) When a block pool is resized, we create a new gem handle with a
+    *     different size and, in the case of surface states, possibly a
+    *     different center offset but we re-use the same anv_bo struct when
+    *     we do so.  If this happens in the middle of setting up an execbuf,
+    *     we could end up with our list of BOs out of sync with our list of
+    *     gem handles.
+    *
+    *  2) The algorithm we use for building the list of unique buffers isn't
+    *     thread-safe.  While the client is supposed to syncronize around
+    *     QueueSubmit, this would be extremely difficult to debug if it ever
+    *     came up in the wild due to a broken app.  It's better to play it
+    *     safe and just lock around QueueSubmit.
+    *
+    * Since the only other things that ever take the device lock such as block
+    * pool resize only rarely happen, this will almost never be contended so
+    * taking a lock isn't really an expensive operation in this case.
+    */
+   pthread_mutex_lock(&device->mutex);
+
    for (uint32_t i = 0; i < submitCount; i++) {
       for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
          ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
@@ -1105,7 +1126,7 @@ VkResult anv_QueueSubmit(
 
          result = anv_cmd_buffer_execbuf(device, cmd_buffer);
          if (result != VK_SUCCESS)
-            return result;
+            goto out;
       }
    }
 
@@ -1113,10 +1134,13 @@ VkResult anv_QueueSubmit(
       struct anv_bo *fence_bo = &fence->bo;
       result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
       if (result != VK_SUCCESS)
-         return result;
+         goto out;
    }
 
-   return VK_SUCCESS;
+out:
+   pthread_mutex_unlock(&device->mutex);
+
+   return result;
 }
 
 VkResult anv_QueueWaitIdle(
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 2d5d673..31b4766 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1141,17 +1141,6 @@ enum anv_cmd_buffer_exec_mode {
    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
 };
 
-struct anv_execbuf {
-   struct drm_i915_gem_execbuffer2           execbuf;
-
-   struct drm_i915_gem_exec_object2 *        objects;
-   uint32_t                                  bo_count;
-   struct anv_bo **                          bos;
-
-   /* Allocated length of the 'objects' and 'bos' arrays */
-   uint32_t                                  array_length;
-};
-
 struct anv_cmd_buffer {
    VK_LOADER_DATA                               _loader_data;
 
@@ -1187,8 +1176,6 @@ struct anv_cmd_buffer {
    /** Last seen surface state block pool center bo offset */
    uint32_t                                     last_ss_pool_center;
 
-   struct anv_execbuf                           execbuf2;
-
    /* Serial for tracking buffer completion */
    uint32_t                                     serial;
 
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 24e0012..045cb9d 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -200,20 +200,9 @@ genX(EndCommandBuffer)(
     VkCommandBuffer                             commandBuffer)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_device *device = cmd_buffer->device;
 
    anv_cmd_buffer_end_batch_buffer(cmd_buffer);
 
-   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
-      /* The algorithm used to compute the validate list is not threadsafe as
-       * it uses the bo->index field.  We have to lock the device around it.
-       * Fortunately, the chances for contention here are probably very low.
-       */
-      pthread_mutex_lock(&device->mutex);
-      anv_cmd_buffer_prepare_execbuf(cmd_buffer);
-      pthread_mutex_unlock(&device->mutex);
-   }
-
    return VK_SUCCESS;
 }
author	Jason Ekstrand <jason.ekstrand@intel.com>	2016-11-05 19:47:33 -0700
committer	Emil Velikov <emil.l.velikov@gmail.com>	2016-11-09 23:36:32 +0000
commit	621b04873436e5875394cbe4d8733873e0c01d8e (patch)
tree	bb4d3b43c77672af77a022a394d58d493a78271b
parent	039a03d8d2073d2f4234c02376e082e7f107d284 (diff)
download	external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.zip external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.tar.gz external_mesa3d-621b04873436e5875394cbe4d8733873e0c01d8e.tar.bz2