summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/gen7_l3_state.c
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2016-01-14 11:22:46 -0800
committerFrancisco Jerez <currojerez@riseup.net>2016-02-08 15:45:44 -0800
commit0aa4f99f562a05880a779707cbcd46be459863bf (patch)
treea9c02bfad7ae5798eb63aee57f3be178101e4ea7 /src/mesa/drivers/dri/i965/gen7_l3_state.c
parent1817e3c07a63c6404d6df323fbd443ccd9304b02 (diff)
downloadexternal_mesa3d-0aa4f99f562a05880a779707cbcd46be459863bf.zip
external_mesa3d-0aa4f99f562a05880a779707cbcd46be459863bf.tar.gz
external_mesa3d-0aa4f99f562a05880a779707cbcd46be459863bf.tar.bz2
i965: Fix cache pollution race during L3 partitioning set-up.
We need to split the stalling flush from the RO cache invalidation into a different PIPE_CONTROL command to make sure that the top of the pipe invalidation happens after any previous rendering is complete. Otherwise it's possible for previous rendering to pollute the L3 cache in the short window of time between RO invalidation and the completion of the stalling flush. Fixes rendering artifacts on Unigine Heaven, Metro Last Light Redux and Metro 2033 Redux. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93540 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93599 Tested-by: Darius Spitznagel <d.spitznagel@goodbytez.de> Tested-by: Martin Peres <martin.peres@linux.intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen7_l3_state.c')
-rw-r--r--src/mesa/drivers/dri/i965/gen7_l3_state.c31
1 files changed, 23 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index b63e61c..85f18d0 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -330,20 +330,35 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
/* According to the hardware docs, the L3 partitioning can only be changed
* while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline and
- * initiates invalidation of the relevant caches...
+ * which involves a first PIPE_CONTROL flush which stalls the pipeline...
*/
brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CONST_CACHE_INVALIDATE |
- PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_DATA_CACHE_INVALIDATE |
PIPE_CONTROL_NO_WRITE |
PIPE_CONTROL_CS_STALL);
- /* ...followed by a second stalling flush which guarantees that
- * invalidation is complete when the L3 configuration registers are
- * modified.
+ /* ...followed by a second pipelined PIPE_CONTROL that initiates
+ * invalidation of the relevant caches. Note that because RO invalidation
+ * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+ * command is processed by the CS) we cannot combine it with the previous
+ * stalling flush as the hardware documentation suggests, because that
+ * would cause the CS to stall on previous rendering *after* RO
+ * invalidation and wouldn't prevent the RO caches from being polluted by
+ * concurrent rendering before the stall completes. This intentionally
+ * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+ * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+ * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+ * already guarantee that there is no concurrent GPGPU kernel execution
+ * (see SKL HSD 2132585).
+ */
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+ PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_NO_WRITE);
+
+ /* Now send a third stalling flush to make sure that invalidation is
+ * complete when the L3 configuration registers are modified.
*/
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_DATA_CACHE_INVALIDATE |