summaryrefslogtreecommitdiffstats
path: root/src/gallium/winsys
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-10-11 23:19:46 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-10-13 19:00:51 +0200
commitd4d9ec55c589156df4edc227a86b4a8c41048d58 (patch)
tree646cdd6806f7a311c7e8a1403d5e715a79386af7 /src/gallium/winsys
parenta077185ea9d685967844b68aa09da6bd8aa430da (diff)
downloadexternal_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.zip
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.gz
external_mesa3d-d4d9ec55c589156df4edc227a86b4a8c41048d58.tar.bz2
radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/winsys')
-rw-r--r--src/gallium/winsys/amdgpu/drm/amdgpu_surface.c57
1 files changed, 53 insertions, 4 deletions
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 8bfea45..1bf07a7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
createFlags.value = 0;
createFlags.useTileIndex = 1;
createFlags.degradeBaseLevel = 1;
+ createFlags.useHtileSliceAlign = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = ws->family;
@@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws,
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
- ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
+ ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
+ ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
{
struct radeon_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
@@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws,
}
}
+ /* TC-compatible HTILE. */
+ if (!is_stencil &&
+ AddrSurfInfoIn->flags.depth &&
+ AddrSurfInfoIn->flags.tcCompatible &&
+ surf_level->mode == RADEON_SURF_MODE_2D &&
+ level == 0) {
+ AddrHtileIn->flags.tcCompatible = 1;
+ AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
+ AddrHtileIn->height = AddrSurfInfoOut->height;
+ AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
+ AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
+ AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeHtileInfo(ws->addrlib,
+ AddrHtileIn,
+ AddrHtileOut);
+
+ if (ret == ADDR_OK) {
+ surf->htile_size = AddrHtileOut->htileBytes;
+ surf->htile_alignment = AddrHtileOut->baseAlign;
+ }
+ }
+
return 0;
}
@@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
+ ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
ADDR_TILEINFO AddrTileInfoIn = {0};
ADDR_TILEINFO AddrTileInfoOut = {0};
int r;
@@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
+ AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
+ AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
type = RADEON_SURF_GET(surf->flags, TYPE);
@@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
- AddrSurfInfoIn.flags.degrade4Space = 1;
+ AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
+
+ /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
+ * requested, because TC-compatible HTILE requires 2D tiling.
+ */
+ AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
@@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
surf->bo_size = 0;
surf->dcc_size = 0;
surf->dcc_alignment = 1;
+ surf->htile_size = 0;
+ surf->htile_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, false, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut,
+ &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
if (r)
return r;
@@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
AddrSurfInfoIn.bpp = 8;
AddrSurfInfoIn.flags.depth = 0;
AddrSurfInfoIn.flags.stencil = 1;
+ AddrSurfInfoIn.flags.tcCompatible = 0;
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
for (level = 0; level <= surf->last_level; level++) {
r = compute_level(ws, surf, true, level, type, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+ &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
+ NULL, NULL);
if (r)
return r;
@@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
ws->info.num_tile_pipes);
}
+ /* Make sure HTILE covers the whole miptree, because the shader reads
+ * TC-compatible HTILE even for levels where it's disabled by DB.
+ */
+ if (surf->htile_size && surf->last_level)
+ surf->htile_size *= 2;
+
return 0;
}