From d4d9ec55c589156df4edc227a86b4a8c41048d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 11 Oct 2016 23:19:46 +0200 Subject: radeonsi: implement TC-compatible HTILE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++ src/gallium/drivers/radeon/r600_texture.c | 67 ++++++++++++++++++++++---- src/gallium/drivers/radeon/radeon_winsys.h | 4 ++ src/gallium/drivers/radeonsi/si_blit.c | 11 ++++- src/gallium/drivers/radeonsi/si_descriptors.c | 7 ++- src/gallium/drivers/radeonsi/si_shader.c | 18 ++++++- src/gallium/drivers/radeonsi/si_state.c | 39 +++++++++++++-- src/gallium/drivers/radeonsi/si_state_draw.c | 3 +- src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 ++++++++++++++++++++-- 9 files changed, 185 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 290b228..5cfcad6 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -245,6 +245,7 @@ struct r600_htile_info { unsigned height; unsigned xalign; unsigned yalign; + unsigned alignment; }; struct r600_texture { @@ -252,6 +253,7 @@ struct r600_texture { uint64_t size; unsigned num_level0_transfers; + enum pipe_format db_render_format; bool is_depth; bool db_compatible; bool can_sample_z; @@ -273,6 +275,7 @@ struct r600_texture { /* Depth buffer compression and fast clear. */ struct r600_htile_info htile; struct r600_resource *htile_buffer; + bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 57cdbcf..625d091 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen, struct radeon_surf *surface, const struct pipe_resource *ptex, unsigned array_mode, - bool is_flushed_depth) + bool is_flushed_depth, + bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(ptex->format); @@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen, if (!is_flushed_depth && is_depth) { surface->flags |= RADEON_SURF_ZBUFFER; + if (tc_compatible_htile && + array_mode == RADEON_SURF_MODE_2D) { + /* TC-compatible HTILE only supports Z32_FLOAT. + * Promote Z16 to Z32. DB->CB copies will convert + * the format for transfers. + */ + surface->bpe = 4; + surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; + } + if (is_stencil) { surface->flags |= RADEON_SURF_SBUFFER | RADEON_SURF_HAS_SBUFFER_MIPTREE; } } + if (rscreen->chip_class >= SI) { surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX; } @@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, rtex->htile.height = height; rtex->htile.xalign = cl_width * 8; rtex->htile.yalign = cl_height * 8; + rtex->htile.alignment = base_align; return (util_max_layer(&rtex->resource.b.b, 0) + 1) * align(slice_bytes, base_align); @@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex); + uint64_t htile_size, alignment; + uint32_t clear_value; + + if (rtex->tc_compatible_htile) { + htile_size = rtex->surface.htile_size; + alignment = rtex->surface.htile_alignment; + clear_value = 0x0000030F; + } else { + htile_size = r600_texture_get_htile_size(rscreen, rtex); + alignment = rtex->htile.alignment; + clear_value = 0; + } if (!htile_size) return; rtex->htile_buffer = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, - PIPE_USAGE_DEFAULT, htile_size); + r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, + PIPE_USAGE_DEFAULT, + htile_size, alignment); if (rtex->htile_buffer == NULL) { /* this is not a fatal error as we can still keep rendering * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, - htile_size, 0, R600_COHERENCY_NONE); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, + 0, htile_size, clear_value, + R600_COHERENCY_NONE); } } @@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f) if (rtex->htile_buffer) fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, " - "xalign=%u, yalign=%u\n", + "xalign=%u, yalign=%u, TC_compatible = %u\n", rtex->htile_buffer->b.b.width0, rtex->htile_buffer->buf->alignment, rtex->htile.pitch, - rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign); + rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign, + rtex->tc_compatible_htile); if (rtex->dcc_offset) { fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n", @@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } + rtex->tc_compatible_htile = rtex->surface.htile_size != 0; + assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) == + rtex->tc_compatible_htile); + + /* TC-compatible HTILE only supports Z32_FLOAT. */ + if (rtex->tc_compatible_htile) + rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; + else + rtex->db_render_format = base->format; + /* Tiled depth textures utilize the non-displayable tile order. * This must be done after r600_setup_surface. * Applies to R600-Cayman. */ @@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_surf surface = {0}; + bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; + bool tc_compatible_htile = + rscreen->chip_class >= VI && + (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && + !(rscreen->debug_flags & DBG_NO_HYPERZ) && + !is_flushed_depth && + templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ + util_format_is_depth_or_stencil(templ->format); + int r; r = r600_init_surface(rscreen, &surface, templ, r600_choose_tiling(rscreen, templ), - templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH); + is_flushed_depth, tc_compatible_htile); if (r) { return NULL; } @@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen else array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - r = r600_init_surface(rscreen, &surface, templ, array_mode, false); + r = r600_init_surface(rscreen, &surface, templ, array_mode, + false, false); if (r) { return NULL; } diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 7146737..8946209 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -278,6 +278,7 @@ enum radeon_feature_id { #define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) #define RADEON_SURF_FMASK (1 << 21) #define RADEON_SURF_DISABLE_DCC (1 << 22) +#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) #define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) @@ -344,6 +345,9 @@ struct radeon_surf { uint64_t dcc_size; uint64_t dcc_alignment; + /* TC-compatible HTILE only. */ + uint64_t htile_size; + uint64_t htile_alignment; }; struct radeon_bo_list_item { diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c143601..db41f56 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx, } } + assert(!tex->tc_compatible_htile || levels_z == 0); + /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ @@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, zsbuf->u.tex.level == 0 && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) { - if (buffers & PIPE_CLEAR_DEPTH) { + /* TC-compatible HTILE only supports depth clears to 0 or 1. */ + if (buffers & PIPE_CLEAR_DEPTH && + (!zstex->tc_compatible_htile || + depth == 0 || depth == 1)) { /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->depth_cleared || zstex->depth_clear_value != depth) { @@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (buffers & PIPE_CLEAR_STENCIL) { + /* TC-compatible HTILE only supports stencil clears to 0. */ + if (buffers & PIPE_CLEAR_STENCIL && + (!zstex->tc_compatible_htile || stencil == 0)) { stencil &= 0xff; /* Need to disable EXPCLEAR temporarily if clearing diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 350242a..19cae65 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex, state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) + tex->dcc_offset + base_level_info->dcc_offset) >> 8; + } else if (tex->tc_compatible_htile) { + state[6] |= S_008F28_COMPRESSION_EN(1); + state[7] = tex->htile_buffer->gpu_address >> 8; } } @@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = (struct r600_texture*)views[i]->texture; + struct si_sampler_view *rview = (struct si_sampler_view *)views[i]; - if (rtex->db_compatible) { + if (rtex->db_compatible && + (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) { samplers->depth_texture_mask |= 1u << slot; } else { samplers->depth_texture_mask &= ~(1u << slot); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fc50205..b2d7699 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4607,12 +4607,26 @@ static void tex_fetch_args( /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { + LLVMValueRef z; + if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - address[count++] = coords[ref_pos]; + z = coords[ref_pos]; } + + /* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (ctx->screen->b.chip_class == VI) + z = radeon_llvm_saturate(bld_base, z); + + address[count++] = z; } /* Pack user derivatives */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ad65fc2..b23749c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx) if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) return; + /* Use the user format, not db_render_format, so that the polygon + * offset behaves as expected by applications. + */ switch (sctx->framebuffer.state.zsbuf->texture->format) { case PIPE_FORMAT_Z16_UNORM: si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); @@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx, uint64_t z_offs, s_offs; uint32_t db_htile_data_base, db_htile_surface; - format = si_translate_dbformat(rtex->resource.b.b.format); + format = si_translate_dbformat(rtex->db_render_format); if (format == V_028040_Z_INVALID) { R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format); @@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx, z_offs += rtex->surface.level[level].offset; s_offs += rtex->surface.stencil_level[level].offset; - db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); + db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile); z_info = S_028040_FORMAT(format); if (rtex->resource.b.b.nr_samples > 1) { @@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx, */ if (rtex->resource.b.b.nr_samples <= 1) s_info |= S_028044_ALLOW_EXPCLEAR(1); - } else - /* Use all of the htile_buffer for depth if there's no stencil. */ + } else if (!rtex->tc_compatible_htile) { + /* Use all of the htile_buffer for depth if there's no stencil. + * This must not be set when TC-compatible HTILE is enabled + * due to a hw bug. + */ s_info |= S_028044_TILE_STENCIL_DISABLE(1); + } uint64_t va = rtex->htile_buffer->gpu_address; db_htile_data_base = va >> 8; db_htile_surface = S_028ABC_FULL_CACHE(1); + + if (rtex->tc_compatible_htile) { + db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); + + switch (rtex->resource.b.b.nr_samples) { + case 0: + case 1: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); + break; + case 2: + case 4: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); + break; + case 8: + z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); + break; + default: + assert(0); + } + } } else { db_htile_data_base = 0; db_htile_surface = 0; @@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf) { surf = (struct r600_surface*)state->zsbuf; + rtex = (struct r600_texture*)surf->base.texture; if (!surf->depth_initialized) { si_init_depth_surface(sctx, surf); @@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx, surflevel = tmp->surface.level; if (tmp->db_compatible) { + if (!view->is_stencil_sampler) + pipe_format = tmp->db_render_format; + switch (pipe_format) { case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: pipe_format = PIPE_FORMAT_Z32_FLOAT; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c14e852..d18137b 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; struct r600_texture *rtex = (struct r600_texture *)surf->texture; - rtex->dirty_level_mask |= 1 << surf->u.tex.level; + if (!rtex->tc_compatible_htile) + rtex->dirty_level_mask |= 1 << surf->u.tex.level; if (rtex->surface.flags & RADEON_SURF_SBUFFER) rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c index 8bfea45..1bf07a7 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c @@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws) createFlags.value = 0; createFlags.useTileIndex = 1; createFlags.degradeBaseLevel = 1; + createFlags.useHtileSliceAlign = 1; addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; addrCreateInput.chipFamily = ws->family; @@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, - ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut) + ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, + ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, + ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) { struct radeon_surf_level *surf_level; ADDR_E_RETURNCODE ret; @@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws, } } + /* TC-compatible HTILE. */ + if (!is_stencil && + AddrSurfInfoIn->flags.depth && + AddrSurfInfoIn->flags.tcCompatible && + surf_level->mode == RADEON_SURF_MODE_2D && + level == 0) { + AddrHtileIn->flags.tcCompatible = 1; + AddrHtileIn->pitch = AddrSurfInfoOut->pitch; + AddrHtileIn->height = AddrSurfInfoOut->height; + AddrHtileIn->numSlices = AddrSurfInfoOut->depth; + AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; + AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; + AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; + AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; + AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; + + ret = AddrComputeHtileInfo(ws->addrlib, + AddrHtileIn, + AddrHtileOut); + + if (ret == ADDR_OK) { + surf->htile_size = AddrHtileOut->htileBytes; + surf->htile_alignment = AddrHtileOut->baseAlign; + } + } + return 0; } @@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; + ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; + ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; ADDR_TILEINFO AddrTileInfoIn = {0}; ADDR_TILEINFO AddrTileInfoOut = {0}; int r; @@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); + AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); + AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; type = RADEON_SURF_GET(surf->flags, TYPE); @@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP; AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0; AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0; - AddrSurfInfoIn.flags.degrade4Space = 1; + AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; + + /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been + * requested, because TC-compatible HTILE requires 2D tiling. + */ + AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible; /* DCC notes: * - If we add MSAA support, keep in mind that CB can't decompress 8bpp @@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, surf->bo_size = 0; surf->dcc_size = 0; surf->dcc_alignment = 1; + surf->htile_size = 0; + surf->htile_alignment = 1; /* Calculate texture layout information. */ for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, false, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, + &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut); if (r) return r; @@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, AddrSurfInfoIn.bpp = 8; AddrSurfInfoIn.flags.depth = 0; AddrSurfInfoIn.flags.stencil = 1; + AddrSurfInfoIn.flags.tcCompatible = 0; /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split; for (level = 0; level <= surf->last_level; level++) { r = compute_level(ws, surf, true, level, type, compressed, - &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut); + &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, + NULL, NULL); if (r) return r; @@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws, ws->info.num_tile_pipes); } + /* Make sure HTILE covers the whole miptree, because the shader reads + * TC-compatible HTILE even for levels where it's disabled by DB. + */ + if (surf->htile_size && surf->last_level) + surf->htile_size *= 2; + return 0; } -- cgit v1.1