From d4d9ec55c589156df4edc227a86b4a8c41048d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 11 Oct 2016 23:19:46 +0200 Subject: radeonsi: implement TC-compatible HTILE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/radeonsi/si_descriptors.c') diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 350242a..19cae65 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex, state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + tex->dcc_offset + base_level_info->dcc_offset) >> 8; + } else if (tex->tc_compatible_htile) { + state[6] |= S_008F28_COMPRESSION_EN(1); + state[7] = tex->htile_buffer->gpu_address >> 8; } } @@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = (struct r600_texture*)views[i]->texture; + struct si_sampler_view *rview = (struct si_sampler_view *)views[i]; - if (rtex->db_compatible) { + if (rtex->db_compatible && + (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) { samplers->depth_texture_mask |= 1u << slot; } else { samplers->depth_texture_mask &= ~(1u << slot); -- cgit v1.1