summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/vulkan/anv_cmd_buffer.c1
-rw-r--r--src/intel/vulkan/anv_genX.h2
-rw-r--r--src/intel/vulkan/anv_private.h40
-rw-r--r--src/intel/vulkan/gen7_cmd_buffer.c2
-rw-r--r--src/intel/vulkan/gen8_cmd_buffer.c2
-rw-r--r--src/intel/vulkan/genX_cmd_buffer.c163
6 files changed, 140 insertions, 70 deletions
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index db7793e..25ab953 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -127,6 +127,7 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
state->dirty = 0;
state->vb_dirty = 0;
+ state->pending_pipe_bits = 0;
state->descriptors_dirty = 0;
state->push_constants_dirty = 0;
state->pipeline = NULL;
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index a5ec27d..cf5a232 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -39,6 +39,8 @@ genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer,
void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
struct anv_subpass *subpass);
+void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer);
+
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index f6ce31d..b3774a8 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1066,6 +1066,45 @@ enum anv_cmd_dirty_bits {
};
typedef uint32_t anv_cmd_dirty_mask_t;
+enum anv_pipe_bits {
+ ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
+ ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
+ ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
+ ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
+ ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
+ ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
+ ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
+ ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
+ ANV_PIPE_CS_STALL_BIT = (1 << 20),
+
+ /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
+ * a flush has happened but not a CS stall. The next time we do any sort
+ * of invalidation we need to insert a CS stall at that time. Otherwise,
+ * we would have to CS stall on every flush which could be bad.
+ */
+ ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
+};
+
+#define ANV_PIPE_FLUSH_BITS ( \
+ ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
+ ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+
+#define ANV_PIPE_STALL_BITS ( \
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
+ ANV_PIPE_DEPTH_STALL_BIT | \
+ ANV_PIPE_CS_STALL_BIT)
+
+#define ANV_PIPE_INVALIDATE_BITS ( \
+ ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
+ ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
+ ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
+ ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
+ ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
+ ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
+
struct anv_vertex_binding {
struct anv_buffer * buffer;
VkDeviceSize offset;
@@ -1164,6 +1203,7 @@ struct anv_cmd_state {
uint32_t vb_dirty;
anv_cmd_dirty_mask_t dirty;
anv_cmd_dirty_mask_t compute_dirty;
+ enum anv_pipe_bits pending_pipe_bits;
uint32_t num_workgroups_offset;
struct anv_bo *num_workgroups_bo;
VkShaderStageFlags descriptors_dirty;
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 7b55133..331275e 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -307,6 +307,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
}
cmd_buffer->state.compute_dirty = 0;
+
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
void
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 065cf9e..547fedd 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -391,6 +391,8 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
}
cmd_buffer->state.compute_dirty = 0;
+
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
void genX(CmdSetEvent)(
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 9f61a10..64172ca 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -136,6 +136,82 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
}
}
+void
+genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
+{
+ enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
+
+ /* Flushes are pipelined while invalidations are handled immediately.
+ * Therefore, if we're flushing anything then we need to schedule a stall
+ * before any invalidations can happen.
+ */
+ if (bits & ANV_PIPE_FLUSH_BITS)
+ bits |= ANV_PIPE_NEEDS_CS_STALL_BIT;
+
+ /* If we're going to do an invalidate and we have a pending CS stall that
+ * has yet to be resolved, we do the CS stall now.
+ */
+ if ((bits & ANV_PIPE_INVALIDATE_BITS) &&
+ (bits & ANV_PIPE_NEEDS_CS_STALL_BIT)) {
+ bits |= ANV_PIPE_CS_STALL_BIT;
+ bits &= ~ANV_PIPE_NEEDS_CS_STALL_BIT;
+ }
+
+ if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.DepthCacheFlushEnable = bits & ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+ pipe.DCFlushEnable = bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT;
+ pipe.RenderTargetCacheFlushEnable =
+ bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+
+ pipe.DepthStallEnable = bits & ANV_PIPE_DEPTH_STALL_BIT;
+ pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
+ pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+
+ /*
+ * According to the Broadwell documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ * - Render Target Cache Flush
+ * - Depth Cache Flush
+ * - Stall at Pixel Scoreboard
+ * - Post-Sync Operation
+ * - Depth Stall
+ * - DC Flush Enable
+ *
+ * I chose "Stall at Pixel Scoreboard" since that's what we use in
+ * mesa and it seems to work fine. The choice is fairly arbitrary.
+ */
+ if ((bits & ANV_PIPE_CS_STALL_BIT) &&
+ !(bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_DEPTH_STALL_BIT |
+ ANV_PIPE_STALL_AT_SCOREBOARD_BIT)))
+ pipe.StallAtPixelScoreboard = true;
+ }
+
+ bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT);
+ }
+
+ if (bits & ANV_PIPE_INVALIDATE_BITS) {
+ anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
+ pipe.StateCacheInvalidationEnable =
+ bits & ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
+ pipe.ConstantCacheInvalidationEnable =
+ bits & ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+ pipe.VFCacheInvalidationEnable =
+ bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+ pipe.TextureCacheInvalidationEnable =
+ bits & ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
+ pipe.InstructionCacheInvalidateEnable =
+ bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT;
+ }
+
+ bits &= ~ANV_PIPE_INVALIDATE_BITS;
+ }
+
+ cmd_buffer->state.pending_pipe_bits = bits;
+}
+
void genX(CmdPipelineBarrier)(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
@@ -149,7 +225,7 @@ void genX(CmdPipelineBarrier)(
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
- uint32_t b, *dw;
+ uint32_t b;
/* XXX: Right now, we're really dumb and just flush whatever categories
* the app asks for. One of these days we may make this a bit better
@@ -173,105 +249,50 @@ void genX(CmdPipelineBarrier)(
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
}
- /* Mask out the Source access flags we care about */
- const uint32_t src_mask =
- VK_ACCESS_SHADER_WRITE_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_TRANSFER_WRITE_BIT;
-
- src_flags = src_flags & src_mask;
-
- /* Mask out the destination access flags we care about */
- const uint32_t dst_mask =
- VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
- VK_ACCESS_INDEX_READ_BIT |
- VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
- VK_ACCESS_UNIFORM_READ_BIT |
- VK_ACCESS_SHADER_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_TRANSFER_READ_BIT;
-
- dst_flags = dst_flags & dst_mask;
-
- /* The src flags represent how things were used previously. This is
- * what we use for doing flushes.
- */
- struct GENX(PIPE_CONTROL) flush_cmd = {
- GENX(PIPE_CONTROL_header),
- .PostSyncOperation = NoWrite,
- };
+ enum anv_pipe_bits pipe_bits = 0;
for_each_bit(b, src_flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT:
- flush_cmd.DCFlushEnable = true;
+ pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
- flush_cmd.RenderTargetCacheFlushEnable = true;
+ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- flush_cmd.DepthCacheFlushEnable = true;
+ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
- flush_cmd.RenderTargetCacheFlushEnable = true;
- flush_cmd.DepthCacheFlushEnable = true;
+ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
default:
- unreachable("should've masked this out by now");
+ break; /* Nothing to do */
}
}
- /* If we end up doing two PIPE_CONTROLs, the first, flusing one also has to
- * stall and wait for the flushing to finish, so we don't re-dirty the
- * caches with in-flight rendering after the second PIPE_CONTROL
- * invalidates.
- */
-
- if (dst_flags)
- flush_cmd.CommandStreamerStallEnable = true;
-
- if (src_flags && dst_flags) {
- dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
- GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &flush_cmd);
- }
-
- /* The dst flags represent how things will be used in the future. This
- * is what we use for doing cache invalidations.
- */
- struct GENX(PIPE_CONTROL) invalidate_cmd = {
- GENX(PIPE_CONTROL_header),
- .PostSyncOperation = NoWrite,
- };
-
for_each_bit(b, dst_flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
- invalidate_cmd.VFCacheInvalidationEnable = true;
+ pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_UNIFORM_READ_BIT:
- invalidate_cmd.ConstantCacheInvalidationEnable = true;
- /* fallthrough */
- case VK_ACCESS_SHADER_READ_BIT:
- invalidate_cmd.TextureCacheInvalidationEnable = true;
+ pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
+ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
+ case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
- invalidate_cmd.TextureCacheInvalidationEnable = true;
- break;
case VK_ACCESS_TRANSFER_READ_BIT:
- invalidate_cmd.TextureCacheInvalidationEnable = true;
+ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
default:
- unreachable("should've masked this out by now");
+ break; /* Nothing to do */
}
}
- if (dst_flags) {
- dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
- GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &invalidate_cmd);
- }
+ cmd_buffer->state.pending_pipe_bits |= pipe_bits;
}
static void
@@ -511,6 +532,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
gen7_cmd_buffer_emit_scissor(cmd_buffer);
genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
+
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
static void