summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_blit.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-10-11 23:19:46 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeonsi/si_blit.c
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
downloadexternal_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.zip
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.gz
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.bz2
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_blit.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c143601..db41f56 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx,
}
}
+ assert(!tex->tc_compatible_htile || levels_z == 0);
+
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
@@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zsbuf->u.tex.level == 0 &&
zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
- if (buffers & PIPE_CLEAR_DEPTH) {
+ /* TC-compatible HTILE only supports depth clears to 0 or 1. */
+ if (buffers & PIPE_CLEAR_DEPTH &&
+ (!zstex->tc_compatible_htile ||
+ depth == 0 || depth == 1)) {
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
@@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
- if (buffers & PIPE_CLEAR_STENCIL) {
+ /* TC-compatible HTILE only supports stencil clears to 0. */
+ if (buffers & PIPE_CLEAR_STENCIL &&
+ (!zstex->tc_compatible_htile || stencil == 0)) {
stencil &= 0xff;
/* Need to disable EXPCLEAR temporarily if clearing