summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_state.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-10-11 23:19:46 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/drivers/radeonsi/si_state.c
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
downloadexternal_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.zip
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.gz
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.bz2
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_state.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c39
1 files changed, 35 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ad65fc2..b23749c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx)
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
return;
+ /* Use the user format, not db_render_format, so that the polygon
+ * offset behaves as expected by applications.
+ */
switch (sctx->framebuffer.state.zsbuf->texture->format) {
case PIPE_FORMAT_Z16_UNORM:
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
@@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx,
uint64_t z_offs, s_offs;
uint32_t db_htile_data_base, db_htile_surface;
- format = si_translate_dbformat(rtex->resource.b.b.format);
+ format = si_translate_dbformat(rtex->db_render_format);
if (format == V_028040_Z_INVALID) {
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
@@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx,
z_offs += rtex->surface.level[level].offset;
s_offs += rtex->surface.stencil_level[level].offset;
- db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+ db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
z_info = S_028040_FORMAT(format);
if (rtex->resource.b.b.nr_samples > 1) {
@@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx,
*/
if (rtex->resource.b.b.nr_samples <= 1)
s_info |= S_028044_ALLOW_EXPCLEAR(1);
- } else
- /* Use all of the htile_buffer for depth if there's no stencil. */
+ } else if (!rtex->tc_compatible_htile) {
+ /* Use all of the htile_buffer for depth if there's no stencil.
+ * This must not be set when TC-compatible HTILE is enabled
+ * due to a hw bug.
+ */
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
uint64_t va = rtex->htile_buffer->gpu_address;
db_htile_data_base = va >> 8;
db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+ if (rtex->tc_compatible_htile) {
+ db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+ switch (rtex->resource.b.b.nr_samples) {
+ case 0:
+ case 1:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+ break;
+ case 2:
+ case 4:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+ break;
+ case 8:
+ z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+ break;
+ default:
+ assert(0);
+ }
+ }
} else {
db_htile_data_base = 0;
db_htile_surface = 0;
@@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
+ rtex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
@@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
surflevel = tmp->surface.level;
if (tmp->db_compatible) {
+ if (!view->is_stencil_sampler)
+ pipe_format = tmp->db_render_format;
+
switch (pipe_format) {
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
pipe_format = PIPE_FORMAT_Z32_FLOAT;