summaryrefslogtreecommitdiffstats
path: root/src/gallium/winsys
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-07 10:50:14 +0200
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-09-12 13:54:59 +0200
commit11cbf4d7aea861e37067407ba7a660ea566c1593 (patch)
tree20eddeb7982c07dcde7ce6a40fafc8b4065a1cd1 /src/gallium/winsys
parent480ac143df281fef762e30f92bccc3b89674fef8 (diff)
downloadexternal_mesa3d-11cbf4d7aea861e37067407ba7a660ea566c1593.zip
external_mesa3d-11cbf4d7aea861e37067407ba7a660ea566c1593.tar.gz
external_mesa3d-11cbf4d7aea861e37067407ba7a660ea566c1593.tar.bz2
winsys/amdgpu: use only one fence per BO
The fence that is added to the BO during flush is guaranteed to be signaled after all the fences that were in the fences array of the BO before the flush, because those fences are added as dependencies for the submission (and all this happens atomically under the bo_fence_lock). Therefore, keeping only the last fence around is sufficient. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Diffstat (limited to 'src/gallium/winsys')
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.c54
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_bo.h4
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_cs.c66
3 files changed, 56 insertions, 68 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 32df0be..a6d4aa4 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -44,7 +44,6 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
struct amdgpu_winsys *ws = bo->ws;
int64_t abs_timeout;
- int i;
if (timeout == 0) {
if (p_atomic_read(&bo->num_active_ioctls))
@@ -75,49 +74,42 @@ static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
if (timeout == 0) {
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++)
- if (bo->fence[i]) {
- if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
- /* Release the idle fence to avoid checking it again later. */
- amdgpu_fence_reference(&bo->fence[i], NULL);
- } else {
- pipe_mutex_unlock(ws->bo_fence_lock);
- return false;
- }
+ if (bo->fence) {
+ if (amdgpu_fence_wait(bo->fence, 0, false)) {
+ /* Release the idle fence to avoid checking it again later. */
+ amdgpu_fence_reference(&bo->fence, NULL);
+ } else {
+ pipe_mutex_unlock(ws->bo_fence_lock);
+ return false;
}
+ }
pipe_mutex_unlock(ws->bo_fence_lock);
return true;
} else {
- struct pipe_fence_handle *fence[RING_LAST] = {};
- bool fence_idle[RING_LAST] = {};
+ struct pipe_fence_handle *fence = NULL;
+ bool fence_idle = false;
bool buffer_idle = true;
- /* Take references to all fences, so that we can wait for them
+ /* Take a reference to the fences, so that we can wait for it
* without the lock. */
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++)
- amdgpu_fence_reference(&fence[i], bo->fence[i]);
+ amdgpu_fence_reference(&fence, bo->fence);
pipe_mutex_unlock(ws->bo_fence_lock);
- /* Now wait for the fences. */
- for (i = 0; i < RING_LAST; i++) {
- if (fence[i]) {
- if (amdgpu_fence_wait(fence[i], abs_timeout, true))
- fence_idle[i] = true;
- else
- buffer_idle = false;
- }
+ /* Now wait for the fence. */
+ if (fence) {
+ if (amdgpu_fence_wait(fence, abs_timeout, true))
+ fence_idle = true;
+ else
+ buffer_idle = false;
}
/* Release idle fences to avoid checking them again later. */
pipe_mutex_lock(ws->bo_fence_lock);
- for (i = 0; i < RING_LAST; i++) {
- if (fence[i] == bo->fence[i] && fence_idle[i])
- amdgpu_fence_reference(&bo->fence[i], NULL);
-
- amdgpu_fence_reference(&fence[i], NULL);
- }
+ if (fence == bo->fence && fence_idle)
+ amdgpu_fence_reference(&bo->fence, NULL);
+ amdgpu_fence_reference(&fence, NULL);
pipe_mutex_unlock(ws->bo_fence_lock);
return buffer_idle;
@@ -133,7 +125,6 @@ static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
void amdgpu_bo_destroy(struct pb_buffer *_buf)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
- int i;
pipe_mutex_lock(bo->ws->global_bo_list_lock);
LIST_DEL(&bo->global_list_item);
@@ -144,8 +135,7 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
amdgpu_va_range_free(bo->va_handle);
amdgpu_bo_free(bo->bo);
- for (i = 0; i < RING_LAST; i++)
- amdgpu_fence_reference(&bo->fence[i], NULL);
+ amdgpu_fence_reference(&bo->fence, NULL);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
index 70d9854..07403dd 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -62,8 +62,8 @@ struct amdgpu_winsys_bo {
*/
volatile int is_shared; /* bool (int for atomicity) */
- /* Fences for buffer synchronization. */
- struct pipe_fence_handle *fence[RING_LAST];
+ /* Fence for buffer synchronization. */
+ struct pipe_fence_handle *fence;
struct list_head global_list_item;
};
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0bb916e..16dd45a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -827,44 +827,42 @@ DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
{
struct amdgpu_cs_context *cs = acs->csc;
- int i, j;
+ int i;
cs->request.number_of_dependencies = 0;
for (i = 0; i < cs->num_buffers; i++) {
- for (j = 0; j < RING_LAST; j++) {
- struct amdgpu_cs_fence *dep;
- unsigned idx;
-
- struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j];
- if (!bo_fence)
- continue;
-
- if (bo_fence->ctx == acs->ctx &&
- bo_fence->fence.ip_type == cs->request.ip_type &&
- bo_fence->fence.ip_instance == cs->request.ip_instance &&
- bo_fence->fence.ring == cs->request.ring)
- continue;
-
- if (amdgpu_fence_wait((void *)bo_fence, 0, false))
- continue;
-
- if (bo_fence->submission_in_progress)
- os_wait_until_zero(&bo_fence->submission_in_progress,
- PIPE_TIMEOUT_INFINITE);
-
- idx = cs->request.number_of_dependencies++;
- if (idx >= cs->max_dependencies) {
- unsigned size;
-
- cs->max_dependencies = idx + 8;
- size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
- cs->request.dependencies = realloc(cs->request.dependencies, size);
- }
-
- dep = &cs->request.dependencies[idx];
- memcpy(dep, &bo_fence->fence, sizeof(*dep));
+ struct amdgpu_cs_fence *dep;
+ unsigned idx;
+
+ struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence;
+ if (!bo_fence)
+ continue;
+
+ if (bo_fence->ctx == acs->ctx &&
+ bo_fence->fence.ip_type == cs->request.ip_type &&
+ bo_fence->fence.ip_instance == cs->request.ip_instance &&
+ bo_fence->fence.ring == cs->request.ring)
+ continue;
+
+ if (amdgpu_fence_wait((void *)bo_fence, 0, false))
+ continue;
+
+ if (bo_fence->submission_in_progress)
+ os_wait_until_zero(&bo_fence->submission_in_progress,
+ PIPE_TIMEOUT_INFINITE);
+
+ idx = cs->request.number_of_dependencies++;
+ if (idx >= cs->max_dependencies) {
+ unsigned size;
+
+ cs->max_dependencies = idx + 8;
+ size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
+ cs->request.dependencies = realloc(cs->request.dependencies, size);
}
+
+ dep = &cs->request.dependencies[idx];
+ memcpy(dep, &bo_fence->fence, sizeof(*dep));
}
}
@@ -1054,7 +1052,7 @@ static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
amdgpu_add_fence_dependencies(cs);
for (i = 0; i < num_buffers; i++) {
p_atomic_inc(&cur->buffers[i].bo->num_active_ioctls);
- amdgpu_fence_reference(&cur->buffers[i].bo->fence[cs->ring_type],
+ amdgpu_fence_reference(&cur->buffers[i].bo->fence,
cur->fence);
}
pipe_mutex_unlock(ws->bo_fence_lock);