summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon/r600_buffer_common.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/radeon/r600_buffer_common.c')
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c58
1 files changed, 26 insertions, 32 deletions
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 90ca8cb..a7ecfb3 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx,
return data;
}
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+ unsigned dstx, unsigned srcx, unsigned size)
+{
+ bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+
+ return rctx->screen->has_cp_dma ||
+ (dword_aligned && (rctx->rings.dma.cs ||
+ rctx->screen->has_streamout));
+
+}
+
static void *r600_buffer_transfer_map(struct pipe_context *ctx,
struct pipe_resource *resource,
unsigned level,
@@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
- (rscreen->has_cp_dma ||
- (rscreen->has_streamout &&
- /* The buffer range must be aligned to 4 with streamout. */
- box->x % 4 == 0 && box->width % 4 == 0))) {
+ r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
assert(usage & PIPE_TRANSFER_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU. */
@@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
- /* Using DMA for larger reads is much faster */
+ /* Using a staging buffer in GTT for larger reads is much faster. */
else if ((usage & PIPE_TRANSFER_READ) &&
!(usage & PIPE_TRANSFER_WRITE) &&
- (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
+ rbuffer->domains == RADEON_DOMAIN_VRAM &&
+ r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
unsigned offset;
struct r600_resource *staging = NULL;
@@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
if (staging) {
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
- /* Copy the staging buffer into the original one. */
- if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0,
- box->x % R600_MAP_BUFFER_ALIGNMENT,
- 0, 0, resource, level, box)) {
- rctx->rings.gfx.flush(rctx, 0);
- if (rctx->rings.dma.cs)
- rctx->rings.dma.flush(rctx, 0);
-
- /* Wait for any offloaded CS flush to complete
- * to avoid busy-waiting in the winsys. */
- rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
- if (rctx->rings.dma.cs)
- rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
-
- rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE);
- return r600_buffer_get_transfer(ctx, resource, level, usage, box,
- ptransfer, data, staging, offset);
- } else {
- pipe_resource_reference((struct pipe_resource**)&staging, NULL);
- }
+ /* Copy the VRAM buffer to the staging buffer. */
+ rctx->dma_copy(ctx, &staging->b.b, 0,
+ box->x % R600_MAP_BUFFER_ALIGNMENT,
+ 0, 0, resource, level, box);
+
+ /* Just do the synchronization. The buffer is mapped already. */
+ r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+
+ return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+ ptransfer, data, staging, offset);
}
}
@@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
u_box_1d(soffset, size, &box);
/* Copy the staging buffer into the original one. */
- if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
- rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
- /* DONE. */
- } else {
- ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
- }
+ rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
}
pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
}