/* * Mesa 3-D graphics library * * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include "ilo_debug.h" #include "ilo_image.h" enum { IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE, IMAGE_TILING_X = 1 << GEN6_TILING_X, IMAGE_TILING_Y = 1 << GEN6_TILING_Y, IMAGE_TILING_W = 1 << GEN8_TILING_W, IMAGE_TILING_ALL = (IMAGE_TILING_NONE | IMAGE_TILING_X | IMAGE_TILING_Y | IMAGE_TILING_W) }; struct ilo_image_layout { enum ilo_image_walk_type walk; bool interleaved_samples; uint8_t valid_tilings; enum gen_surface_tiling tiling; enum ilo_image_aux_type aux; int align_i; int align_j; struct ilo_image_lod *lods; int walk_layer_h0; int walk_layer_h1; int walk_layer_height; int monolithic_width; int monolithic_height; }; static enum ilo_image_walk_type image_get_gen6_walk(const struct ilo_dev *dev, const struct ilo_image_info *info) { ILO_DEV_ASSERT(dev, 6, 6); /* TODO we want LODs to be page-aligned */ if (info->type == GEN6_SURFTYPE_3D) return ILO_IMAGE_WALK_3D; /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The separate stencil buffer does not support mip mapping, thus the * storage for LODs other than LOD 0 is not needed. The following * QPitch equation applies only to the separate stencil buffer: * * QPitch = h_0" * * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels * when bound. */ if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT) return ILO_IMAGE_WALK_LOD; /* compact spacing is not supported otherwise */ return ILO_IMAGE_WALK_LAYER; } static enum ilo_image_walk_type image_get_gen7_walk(const struct ilo_dev *dev, const struct ilo_image_info *info) { ILO_DEV_ASSERT(dev, 7, 8); if (info->type == GEN6_SURFTYPE_3D) return ILO_IMAGE_WALK_3D; /* * From the Ivy Bridge PRM, volume 1 part 1, page 111: * * "note that the depth buffer and stencil buffer have an implied value * of ARYSPC_FULL" * * From the Ivy Bridge PRM, volume 4 part 1, page 66: * * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array * Spacing) must be set to ARYSPC_LOD0." */ if (info->sample_count > 1) assert(info->level_count == 1); return (info->bind_zs || info->level_count > 1) ? ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD; } static bool image_get_gen6_interleaved_samples(const struct ilo_dev *dev, const struct ilo_image_info *info) { ILO_DEV_ASSERT(dev, 6, 8); /* * Gen6 supports only interleaved samples. It is not explicitly stated, * but on Gen7+, render targets are expected to be UMS/CMS (samples * non-interleaved) and depth/stencil buffers are expected to be IMS * (samples interleaved). * * See "Multisampled Surface Storage Format" field of SURFACE_STATE. */ return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs); } static uint8_t image_get_gen6_valid_tilings(const struct ilo_dev *dev, const struct ilo_image_info *info) { uint8_t valid_tilings = IMAGE_TILING_ALL; ILO_DEV_ASSERT(dev, 6, 8); if (info->valid_tilings) valid_tilings &= info->valid_tilings; /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "Display/Overlay Y-Major not supported. * X-Major required for Async Flips" */ if (unlikely(info->bind_scanout)) valid_tilings &= IMAGE_TILING_X; /* * From the Sandy Bridge PRM, volume 3 part 2, page 158: * * "The cursor surface address must be 4K byte aligned. The cursor must * be in linear memory, it cannot be tiled." */ if (unlikely(info->bind_cursor)) valid_tilings &= IMAGE_TILING_NONE; /* * From the Sandy Bridge PRM, volume 2 part 1, page 318: * * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear * Depth Buffer is not supported." * * "The Depth Buffer, if tiled, must use Y-Major tiling." * * From the Sandy Bridge PRM, volume 1 part 2, page 22: * * "W-Major Tile Format is used for separate stencil." */ if (info->bind_zs) { if (info->format == GEN6_FORMAT_R8_UINT) valid_tilings &= IMAGE_TILING_W; else valid_tilings &= IMAGE_TILING_Y; } if (info->bind_surface_sampler || info->bind_surface_dp_render || info->bind_surface_dp_typed) { /* * From the Haswell PRM, volume 2d, page 233: * * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field * (Tiled Surface) must be TRUE." */ if (info->sample_count > 1) valid_tilings &= ~IMAGE_TILING_NONE; if (ilo_dev_gen(dev) < ILO_GEN(8)) valid_tilings &= ~IMAGE_TILING_W; } if (info->bind_surface_dp_render) { /* * From the Sandy Bridge PRM, volume 1 part 2, page 32: * * "NOTE: 128BPE Format Color buffer ( render target ) MUST be * either TileX or Linear." * * From the Haswell PRM, volume 5, page 32: * * "NOTE: 128 BPP format color buffer (render target) supports * Linear, TiledX and TiledY." */ if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16) valid_tilings &= ~IMAGE_TILING_Y; /* * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "This field (Surface Vertical Aligment) must be set to VALIGN_4 * for all tiled Y Render Target surfaces." * * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." * * R32G32B32_FLOAT is not renderable and we only need an assert() here. */ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); } return valid_tilings; } static uint64_t image_get_gen6_estimated_size(const struct ilo_dev *dev, const struct ilo_image_info *info) { /* padding not considered */ const uint64_t slice_size = info->width * info->height * info->block_size / (info->block_width * info->block_height); const uint64_t slice_count = info->depth * info->array_size * info->sample_count; const uint64_t estimated_size = slice_size * slice_count; ILO_DEV_ASSERT(dev, 6, 8); if (info->level_count == 1) return estimated_size; else return estimated_size * 4 / 3; } static enum gen_surface_tiling image_get_gen6_tiling(const struct ilo_dev *dev, const struct ilo_image_info *info, uint8_t valid_tilings) { ILO_DEV_ASSERT(dev, 6, 8); switch (valid_tilings) { case IMAGE_TILING_NONE: return GEN6_TILING_NONE; case IMAGE_TILING_X: return GEN6_TILING_X; case IMAGE_TILING_Y: return GEN6_TILING_Y; case IMAGE_TILING_W: return GEN8_TILING_W; default: break; } /* * X-tiling has the property that vertically adjacent pixels are usually in * the same page. When the image size is less than a page, the image * height is 1, or when the image is not accessed in blocks, there is no * reason to tile. * * Y-tiling is similar, where vertically adjacent pixels are usually in the * same cacheline. */ if (valid_tilings & IMAGE_TILING_NONE) { const uint64_t estimated_size = image_get_gen6_estimated_size(dev, info); if (info->height == 1 || !(info->bind_surface_sampler || info->bind_surface_dp_render || info->bind_surface_dp_typed)) return GEN6_TILING_NONE; if (estimated_size <= 64 || (info->prefer_linear_threshold && estimated_size > info->prefer_linear_threshold)) return GEN6_TILING_NONE; if (estimated_size <= 2048) valid_tilings &= ~IMAGE_TILING_X; } return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y : (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X : GEN6_TILING_NONE; } static bool image_get_gen6_hiz_enable(const struct ilo_dev *dev, const struct ilo_image_info *info) { ILO_DEV_ASSERT(dev, 6, 8); /* depth buffer? */ if (!info->bind_zs || info->format == GEN6_FORMAT_R8_UINT || info->interleaved_stencil) return false; /* we want to be able to force 8x4 alignments */ if (info->type == GEN6_SURFTYPE_1D) return false; if (info->aux_disable) return false; if (ilo_debug & ILO_DEBUG_NOHIZ) return false; return true; } static bool image_get_gen7_mcs_enable(const struct ilo_dev *dev, const struct ilo_image_info *info, enum gen_surface_tiling tiling) { ILO_DEV_ASSERT(dev, 7, 8); if (!info->bind_surface_sampler && !info->bind_surface_dp_render) return false; /* * From the Ivy Bridge PRM, volume 4 part 1, page 77: * * "For Render Target and Sampling Engine Surfaces:If the surface is * multisampled (Number of Multisamples any value other than * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." * * "This field must be set to 0 for all SINT MSRTs when all RT channels * are not written" */ if (info->sample_count > 1) { if (ilo_dev_gen(dev) < ILO_GEN(8)) assert(!info->is_integer); return true; } if (info->aux_disable) return false; /* * From the Ivy Bridge PRM, volume 2 part 1, page 326: * * "When MCS is buffer is used for color clear of non-multisampler * render target, the following restrictions apply. * - Support is limited to tiled render targets. * - Support is for non-mip-mapped and non-array surface types only. * - Clear is supported only on the full RT; i.e., no partial clear or * overlapping clears. * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, * 64bpp and 128bpp. * ..." * * How about SURFTYPE_3D? */ if (!info->bind_surface_dp_render || tiling == GEN6_TILING_NONE || info->level_count > 1 || info->array_size > 1) return false; switch (info->block_size) { case 4: case 8: case 16: return true; default: return false; } } static void image_get_gen6_alignments(const struct ilo_dev *dev, const struct ilo_image_info *info, int *align_i, int *align_j) { ILO_DEV_ASSERT(dev, 6, 6); /* * From the Sandy Bridge PRM, volume 1 part 1, page 113: * * "surface format align_i align_j * YUV 4:2:2 formats 4 *see below * BC1-5 4 4 * FXT1 8 4 * all other formats 4 *see below" * * "- align_j = 4 for any depth buffer * - align_j = 2 for separate stencil buffer * - align_j = 4 for any render target surface is multisampled (4x) * - align_j = 4 for any render target surface with Surface Vertical * Alignment = VALIGN_4 * - align_j = 2 for any render target surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 2 for all other render target surface * - align_j = 2 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_2 * - align_j = 4 for any sampling engine surface with Surface Vertical * Alignment = VALIGN_4" * * From the Sandy Bridge PRM, volume 4 part 1, page 86: * * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if * the Surface Format is 96 bits per element (BPE)." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * GEN6_FORMAT_R8_UINT 4 2 * other depth/stencil formats 4 4 * 4x multisampled 4 4 * bpp 96 4 2 * others 4 2 or 4 */ *align_i = (info->compressed) ? info->block_width : 4; if (info->compressed) { *align_j = info->block_height; } else if (info->bind_zs) { *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4; } else { *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2; } } static void image_get_gen7_alignments(const struct ilo_dev *dev, const struct ilo_image_info *info, enum gen_surface_tiling tiling, int *align_i, int *align_j) { int i, j; ILO_DEV_ASSERT(dev, 7, 8); /* * From the Ivy Bridge PRM, volume 1 part 1, page 110: * * "surface defined by surface format align_i align_j * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 * not D16_UNORM 4 4 * 3DSTATE_STENCIL_BUFFER N/A 8 8 * SURFACE_STATE BC*, ETC*, EAC* 4 4 * FXT1 8 4 * all others (set by SURFACE_STATE)" * * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "- This field (Surface Vertical Aligment) is intended to be set to * VALIGN_4 if the surface was rendered as a depth buffer, for a * multisampled (4x) render target, or for a multisampled (8x) * render target, since these surfaces support only alignment of 4. * - Use of VALIGN_4 for other surfaces is supported, but uses more * memory. * - This field must be set to VALIGN_4 for all tiled Y Render Target * surfaces. * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field * must be set to VALIGN_4." * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." * * "- This field (Surface Horizontal Aligment) is intended to be set to * HALIGN_8 only if the surface was rendered as a depth buffer with * Z16 format or a stencil buffer, since these surfaces support only * alignment of 8. * - Use of HALIGN_8 for other surfaces is supported, but uses more * memory. * - This field must be set to HALIGN_4 if the Surface Format is BC*. * - This field must be set to HALIGN_8 if the Surface Format is * FXT1." * * They can be rephrased as * * align_i align_j * compressed formats block width block height * GEN6_FORMAT_R16_UNORM 8 4 * GEN6_FORMAT_R8_UINT 8 8 * other depth/stencil formats 4 4 * 2x or 4x multisampled 4 or 8 4 * tiled Y 4 or 8 4 (if rt) * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2 * others 4 or 8 2 or 4 */ if (info->compressed) { i = info->block_width; j = info->block_height; } else if (info->bind_zs) { switch (info->format) { case GEN6_FORMAT_R16_UNORM: i = 8; j = 4; break; case GEN6_FORMAT_R8_UINT: i = 8; j = 8; break; default: i = 4; j = 4; break; } } else { const bool valign_4 = (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) || (tiling == GEN6_TILING_Y && info->bind_surface_dp_render)); if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4) assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT); i = 4; j = (valign_4) ? 4 : 2; } *align_i = i; *align_j = j; } static bool image_init_gen6_hardware_layout(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { ILO_DEV_ASSERT(dev, 6, 8); if (ilo_dev_gen(dev) >= ILO_GEN(7)) layout->walk = image_get_gen7_walk(dev, info); else layout->walk = image_get_gen6_walk(dev, info); layout->interleaved_samples = image_get_gen6_interleaved_samples(dev, info); layout->valid_tilings = image_get_gen6_valid_tilings(dev, info); if (!layout->valid_tilings) return false; layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings); if (image_get_gen6_hiz_enable(dev, info)) layout->aux = ILO_IMAGE_AUX_HIZ; else if (ilo_dev_gen(dev) >= ILO_GEN(7) && image_get_gen7_mcs_enable(dev, info, layout->tiling)) layout->aux = ILO_IMAGE_AUX_MCS; else layout->aux = ILO_IMAGE_AUX_NONE; if (ilo_dev_gen(dev) >= ILO_GEN(7)) { image_get_gen7_alignments(dev, info, layout->tiling, &layout->align_i, &layout->align_j); } else { image_get_gen6_alignments(dev, info, &layout->align_i, &layout->align_j); } return true; } static bool image_init_gen6_transfer_layout(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { ILO_DEV_ASSERT(dev, 6, 8); /* we can define our own layout to save space */ layout->walk = ILO_IMAGE_WALK_LOD; layout->interleaved_samples = false; layout->valid_tilings = IMAGE_TILING_NONE; layout->tiling = GEN6_TILING_NONE; layout->aux = ILO_IMAGE_AUX_NONE; layout->align_i = info->block_width; layout->align_j = info->block_height; return true; } static void image_get_gen6_slice_size(const struct ilo_dev *dev, const struct ilo_image_info *info, const struct ilo_image_layout *layout, uint8_t level, int *width, int *height) { int w, h; ILO_DEV_ASSERT(dev, 6, 8); w = u_minify(info->width, level); h = u_minify(info->height, level); /* * From the Sandy Bridge PRM, volume 1 part 1, page 114: * * "The dimensions of the mip maps are first determined by applying the * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, * if necessary, they are padded out to compression block boundaries." */ w = align(w, info->block_width); h = align(h, info->block_height); /* * From the Sandy Bridge PRM, volume 1 part 1, page 111: * * "If the surface is multisampled (4x), these values must be adjusted * as follows before proceeding: * * W_L = ceiling(W_L / 2) * 4 * H_L = ceiling(H_L / 2) * 4" * * From the Ivy Bridge PRM, volume 1 part 1, page 108: * * "If the surface is multisampled and it is a depth or stencil surface * or Multisampled Surface StorageFormat in SURFACE_STATE is * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before * proceeding: * * #samples W_L = H_L = * 2 ceiling(W_L / 2) * 4 HL [no adjustment] * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" * * For interleaved samples (4x), where pixels * * (x, y ) (x+1, y ) * (x, y+1) (x+1, y+1) * * would be is occupied by * * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) * * Thus the need to * * w = align(w, 2) * 2; * y = align(y, 2) * 2; */ if (layout->interleaved_samples) { switch (info->sample_count) { case 1: break; case 2: w = align(w, 2) * 2; break; case 4: w = align(w, 2) * 2; h = align(h, 2) * 2; break; case 8: w = align(w, 2) * 4; h = align(h, 2) * 2; break; case 16: w = align(w, 2) * 4; h = align(h, 2) * 4; break; default: assert(!"unsupported sample count"); break; } } /* * From the Ivy Bridge PRM, volume 1 part 1, page 108: * * "For separate stencil buffer, the width must be mutiplied by 2 and * height divided by 2..." * * To make things easier (for transfer), we will just double the stencil * stride in 3DSTATE_STENCIL_BUFFER. */ w = align(w, layout->align_i); h = align(h, layout->align_j); *width = w; *height = h; } static int image_get_gen6_layer_count(const struct ilo_dev *dev, const struct ilo_image_info *info, const struct ilo_image_layout *layout) { int count = info->array_size; ILO_DEV_ASSERT(dev, 6, 8); /* samples of the same index are stored in a layer */ if (!layout->interleaved_samples) count *= info->sample_count; return count; } static void image_get_gen6_walk_layer_heights(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { ILO_DEV_ASSERT(dev, 6, 8); layout->walk_layer_h0 = layout->lods[0].slice_height; if (info->level_count > 1) { layout->walk_layer_h1 = layout->lods[1].slice_height; } else { int dummy; image_get_gen6_slice_size(dev, info, layout, 1, &dummy, &layout->walk_layer_h1); } if (image_get_gen6_layer_count(dev, info, layout) == 1) { layout->walk_layer_height = 0; return; } /* * From the Sandy Bridge PRM, volume 1 part 1, page 115: * * "The following equation is used for surface formats other than * compressed textures: * * QPitch = (h0 + h1 + 11j)" * * "The equation for compressed textures (BC* and FXT1 surface formats) * follows: * * QPitch = (h0 + h1 + 11j) / 4" * * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the * value calculated in the equation above, for every other odd Surface * Height starting from 1 i.e. 1,5,9,13" * * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: * * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth * buffer and stencil buffer have an implied value of ARYSPC_FULL): * * QPitch = (h0 + h1 + 12j) * QPitch = (h0 + h1 + 12j) / 4 (compressed) * * (There are many typos or missing words here...)" * * To access the N-th slice, an offset of (Stride * QPitch * N) is added to * the base address. The PRM divides QPitch by 4 for compressed formats * because the block height for those formats are 4, and it wants QPitch to * mean the number of memory rows, as opposed to texel rows, between * slices. Since we use texel rows everywhere, we do not need to divide * QPitch by 4. */ layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 + ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 && info->height % 4 == 1) layout->walk_layer_height += 4; } static void image_get_gen6_monolithic_size(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout, int max_x, int max_y) { int align_w = 1, align_h = 1, pad_h = 0; ILO_DEV_ASSERT(dev, 6, 8); /* * From the Sandy Bridge PRM, volume 1 part 1, page 118: * * "To determine the necessary padding on the bottom and right side of * the surface, refer to the table in Section 7.18.3.4 for the i and j * parameters for the surface format in use. The surface must then be * extended to the next multiple of the alignment unit size in each * dimension, and all texels contained in this extended surface must * have valid GTT entries." * * "For cube surfaces, an additional two rows of padding are required * at the bottom of the surface. This must be ensured regardless of * whether the surface is stored tiled or linear. This is due to the * potential rotation of cache line orientation from memory to cache." * * "For compressed textures (BC* and FXT1 surface formats), padding at * the bottom of the surface is to an even compressed row, which is * equal to a multiple of 8 uncompressed texel rows. Thus, for padding * purposes, these surfaces behave as if j = 8 only for surface * padding purposes. The value of 4 for j still applies for mip level * alignment and QPitch calculation." */ if (info->bind_surface_sampler) { align_w = MAX2(align_w, layout->align_i); align_h = MAX2(align_h, layout->align_j); if (info->type == GEN6_SURFTYPE_CUBE) pad_h += 2; if (info->compressed) align_h = MAX2(align_h, layout->align_j * 2); } /* * From the Sandy Bridge PRM, volume 1 part 1, page 118: * * "If the surface contains an odd number of rows of data, a final row * below the surface must be allocated." */ if (info->bind_surface_dp_render) align_h = MAX2(align_h, 2); /* * Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ * for unaligned non-mipmapped and non-array images. */ if (layout->aux == ILO_IMAGE_AUX_HIZ && info->level_count == 1 && info->array_size == 1 && info->depth == 1) { align_w = MAX2(align_w, 8); align_h = MAX2(align_h, 4); } layout->monolithic_width = align(max_x, align_w); layout->monolithic_height = align(max_y + pad_h, align_h); } static void image_get_gen6_lods(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { const int layer_count = image_get_gen6_layer_count(dev, info, layout); int cur_x, cur_y, max_x, max_y; uint8_t lv; ILO_DEV_ASSERT(dev, 6, 8); cur_x = 0; cur_y = 0; max_x = 0; max_y = 0; for (lv = 0; lv < info->level_count; lv++) { int slice_w, slice_h, lod_w, lod_h; image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h); layout->lods[lv].x = cur_x; layout->lods[lv].y = cur_y; layout->lods[lv].slice_width = slice_w; layout->lods[lv].slice_height = slice_h; switch (layout->walk) { case ILO_IMAGE_WALK_LAYER: lod_w = slice_w; lod_h = slice_h; /* MIPLAYOUT_BELOW */ if (lv == 1) cur_x += lod_w; else cur_y += lod_h; break; case ILO_IMAGE_WALK_LOD: lod_w = slice_w; lod_h = slice_h * layer_count; if (lv == 1) cur_x += lod_w; else cur_y += lod_h; /* every LOD begins at tile boundaries */ if (info->level_count > 1) { assert(info->format == GEN6_FORMAT_R8_UINT); cur_x = align(cur_x, 64); cur_y = align(cur_y, 64); } break; case ILO_IMAGE_WALK_3D: { const int slice_count = u_minify(info->depth, lv); const int slice_count_per_row = 1 << lv; const int row_count = (slice_count + slice_count_per_row - 1) / slice_count_per_row; lod_w = slice_w * slice_count_per_row; lod_h = slice_h * row_count; } cur_y += lod_h; break; default: assert(!"unknown walk type"); lod_w = 0; lod_h = 0; break; } if (max_x < layout->lods[lv].x + lod_w) max_x = layout->lods[lv].x + lod_w; if (max_y < layout->lods[lv].y + lod_h) max_y = layout->lods[lv].y + lod_h; } if (layout->walk == ILO_IMAGE_WALK_LAYER) { image_get_gen6_walk_layer_heights(dev, info, layout); if (layer_count > 1) max_y += layout->walk_layer_height * (layer_count - 1); } else { layout->walk_layer_h0 = 0; layout->walk_layer_h1 = 0; layout->walk_layer_height = 0; } image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y); } static bool image_bind_gpu(const struct ilo_image_info *info) { return (info->bind_surface_sampler || info->bind_surface_dp_render || info->bind_surface_dp_typed || info->bind_zs || info->bind_scanout || info->bind_cursor); } static bool image_validate_gen6(const struct ilo_dev *dev, const struct ilo_image_info *info) { ILO_DEV_ASSERT(dev, 6, 8); /* * From the Ivy Bridge PRM, volume 2 part 1, page 314: * * "The separate stencil buffer is always enabled, thus the field in * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil * buffer has been removed Surface formats with interleaved depth and * stencil are no longer supported" */ if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs) assert(!info->interleaved_stencil); return true; } static bool image_get_gen6_layout(const struct ilo_dev *dev, const struct ilo_image_info *info, struct ilo_image_layout *layout) { ILO_DEV_ASSERT(dev, 6, 8); if (!image_validate_gen6(dev, info)) return false; if (image_bind_gpu(info) || info->level_count > 1) { if (!image_init_gen6_hardware_layout(dev, info, layout)) return false; } else { if (!image_init_gen6_transfer_layout(dev, info, layout)) return false; } /* * the fact that align i and j are multiples of block width and height * respectively is what makes the size of the bo a multiple of the block * size, slices start at block boundaries, and many of the computations * work. */ assert(layout->align_i % info->block_width == 0); assert(layout->align_j % info->block_height == 0); /* make sure align() works */ assert(util_is_power_of_two(layout->align_i) && util_is_power_of_two(layout->align_j)); assert(util_is_power_of_two(info->block_width) && util_is_power_of_two(info->block_height)); image_get_gen6_lods(dev, info, layout); assert(layout->walk_layer_height % info->block_height == 0); assert(layout->monolithic_width % info->block_width == 0); assert(layout->monolithic_height % info->block_height == 0); return true; } static bool image_set_gen6_bo_size(struct ilo_image *img, const struct ilo_dev *dev, const struct ilo_image_info *info, const struct ilo_image_layout *layout) { int stride, height; int align_w, align_h; ILO_DEV_ASSERT(dev, 6, 8); stride = (layout->monolithic_width / info->block_width) * info->block_size; height = layout->monolithic_height / info->block_height; /* * From the Haswell PRM, volume 5, page 163: * * "For linear surfaces, additional padding of 64 bytes is required * at the bottom of the surface. This is in addition to the padding * required above." */ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler && layout->tiling == GEN6_TILING_NONE) height += (64 + stride - 1) / stride; /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "- For linear render target surfaces, the pitch must be a multiple * of the element size for non-YUV surface formats. Pitch must be a * multiple of 2 * element size for YUV surface formats. * * - For other linear surfaces, the pitch can be any multiple of * bytes. * - For tiled surfaces, the pitch must be a multiple of the tile * width." * * Different requirements may exist when the image is used in different * places, but our alignments here should be good enough that we do not * need to check info->bind_x. */ switch (layout->tiling) { case GEN6_TILING_X: align_w = 512; align_h = 8; break; case GEN6_TILING_Y: align_w = 128; align_h = 32; break; case GEN8_TILING_W: /* * From the Sandy Bridge PRM, volume 1 part 2, page 22: * * "A 4KB tile is subdivided into 8-high by 8-wide array of * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 * bytes." */ align_w = 64; align_h = 64; break; default: assert(layout->tiling == GEN6_TILING_NONE); /* some good enough values */ align_w = 64; align_h = 2; break; } if (info->force_bo_stride) { if (info->force_bo_stride % align_w || info->force_bo_stride < stride) return false; img->bo_stride = info->force_bo_stride; } else { img->bo_stride = align(stride, align_w); } img->bo_height = align(height, align_h); return true; } static bool image_set_gen6_hiz(struct ilo_image *img, const struct ilo_dev *dev, const struct ilo_image_info *info, const struct ilo_image_layout *layout) { const int hz_align_j = 8; enum ilo_image_walk_type hz_walk; int hz_width, hz_height; int hz_clear_w, hz_clear_h; uint8_t lv; ILO_DEV_ASSERT(dev, 6, 8); assert(layout->aux == ILO_IMAGE_AUX_HIZ); assert(layout->walk == ILO_IMAGE_WALK_LAYER || layout->walk == ILO_IMAGE_WALK_3D); /* * From the Sandy Bridge PRM, volume 2 part 1, page 312: * * "The hierarchical depth buffer does not support the LOD field, it is * assumed by hardware to be zero. A separate hierarachical depth * buffer is required for each LOD used, and the corresponding * buffer's state delivered to hardware each time a new depth buffer * state with modified LOD is delivered." * * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD. */ if (ilo_dev_gen(dev) >= ILO_GEN(7)) hz_walk = layout->walk; else hz_walk = ILO_IMAGE_WALK_LOD; /* * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge * PRM, volume 2 part 1, page 312-313. * * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a * memory row. */ switch (hz_walk) { case ILO_IMAGE_WALK_LAYER: { const int h0 = align(layout->walk_layer_h0, hz_align_j); const int h1 = align(layout->walk_layer_h1, hz_align_j); const int htail = ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; const int hz_qpitch = h0 + h1 + htail; hz_width = align(layout->lods[0].slice_width, 16); hz_height = hz_qpitch * info->array_size / 2; if (ilo_dev_gen(dev) >= ILO_GEN(7)) hz_height = align(hz_height, 8); img->aux.walk_layer_height = hz_qpitch; } break; case ILO_IMAGE_WALK_LOD: { int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT]; int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT]; int cur_tx, cur_ty; /* figure out the tile offsets of LODs */ hz_width = 0; hz_height = 0; cur_tx = 0; cur_ty = 0; for (lv = 0; lv < info->level_count; lv++) { int tw, th; lod_tx[lv] = cur_tx; lod_ty[lv] = cur_ty; tw = align(layout->lods[lv].slice_width, 16); th = align(layout->lods[lv].slice_height, hz_align_j) * info->array_size / 2; /* convert to Y-tiles */ tw = (tw + 127) / 128; th = (th + 31) / 32; if (hz_width < cur_tx + tw) hz_width = cur_tx + tw; if (hz_height < cur_ty + th) hz_height = cur_ty + th; if (lv == 1) cur_tx += tw; else cur_ty += th; } /* convert tile offsets to memory offsets */ for (lv = 0; lv < info->level_count; lv++) { img->aux.walk_lod_offsets[lv] = (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; } hz_width *= 128; hz_height *= 32; } break; case ILO_IMAGE_WALK_3D: hz_width = align(layout->lods[0].slice_width, 16); hz_height = 0; for (lv = 0; lv < info->level_count; lv++) { const int h = align(layout->lods[lv].slice_height, hz_align_j); /* according to the formula, slices are packed together vertically */ hz_height += h * u_minify(info->depth, lv); } hz_height /= 2; break; default: assert(!"unknown HiZ walk"); hz_width = 0; hz_height = 0; break; } /* * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. * Experiments on Haswell show that aligning the RECTLIST primitive and * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be * aligned. */ hz_clear_w = 8; hz_clear_h = 4; switch (info->sample_count) { case 1: default: break; case 2: hz_clear_w /= 2; break; case 4: hz_clear_w /= 2; hz_clear_h /= 2; break; case 8: hz_clear_w /= 4; hz_clear_h /= 2; break; case 16: hz_clear_w /= 4; hz_clear_h /= 4; break; } for (lv = 0; lv < info->level_count; lv++) { if (u_minify(info->width, lv) % hz_clear_w || u_minify(info->height, lv) % hz_clear_h) break; img->aux.enables |= 1 << lv; } /* we padded to allow this in image_get_gen6_monolithic_size() */ if (info->level_count == 1 && info->array_size == 1 && info->depth == 1) img->aux.enables |= 0x1; /* align to Y-tile */ img->aux.bo_stride = align(hz_width, 128); img->aux.bo_height = align(hz_height, 32); return true; } static bool image_set_gen7_mcs(struct ilo_image *img, const struct ilo_dev *dev, const struct ilo_image_info *info, const struct ilo_image_layout *layout) { int mcs_width, mcs_height, mcs_cpp; int downscale_x, downscale_y; ILO_DEV_ASSERT(dev, 7, 8); assert(layout->aux == ILO_IMAGE_AUX_MCS); if (info->sample_count > 1) { /* * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The * need of scale down could be that the clear rectangle is used to clear * the MCS instead of the RT. * * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The * 2x2 factor could come from that the hardware writes 128 bits (an * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 * pixel block in the RT. */ switch (info->sample_count) { case 2: case 4: downscale_x = 8; downscale_y = 2; mcs_cpp = 1; break; case 8: downscale_x = 2; downscale_y = 2; mcs_cpp = 4; break; case 16: downscale_x = 2; downscale_y = 1; mcs_cpp = 8; break; default: assert(!"unsupported sample count"); return false; break; } /* * It also appears that the 2x2 subspans generated by the scaled-down * clear rectangle cannot be masked. The scale-down clear rectangle * thus must be aligned to 2x2, and we need to pad. */ mcs_width = align(info->width, downscale_x * 2); mcs_height = align(info->height, downscale_y * 2); } else { /* * From the Ivy Bridge PRM, volume 2 part 1, page 327: * * " Pixels Lines * TiledY RT CL * bpp * 32 8 4 * 64 4 4 * 128 2 4 * * TiledX RT CL * bpp * 32 16 2 * 64 8 2 * 128 4 2" * * This table and the two following tables define the RT alignments, the * clear rectangle alignments, and the clear rectangle scale factors. * Viewing the RT alignments as the sizes of 128-byte blocks, we can see * that the clear rectangle alignments are 16x32 blocks, and the clear * rectangle scale factors are 8x16 blocks. * * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the * RT. Similar to the MSAA cases, we can argue that an OWord maps to * 8x16 blocks. * * One problem with this reasoning is that a Y-tile in MCS has 8x32 * OWords and maps to 64x512 128-byte blocks. This differs from i965, * which says that a Y-tile maps to 128x256 blocks (\see * intel_get_non_msrt_mcs_alignment). It does not really change * anything except for the size of the allocated MCS. Let's see if we * hit out-of-bound access. */ switch (layout->tiling) { case GEN6_TILING_X: downscale_x = 64 / info->block_size; downscale_y = 2; break; case GEN6_TILING_Y: downscale_x = 32 / info->block_size; downscale_y = 4; break; default: assert(!"unsupported tiling mode"); return false; break; } downscale_x *= 8; downscale_y *= 16; /* * From the Haswell PRM, volume 7, page 652: * * "Clear rectangle must be aligned to two times the number of * pixels in the table shown below due to 16X16 hashing across the * slice." * * The scaled-down clear rectangle must be aligned to 4x4 instead of * 2x2, and we need to pad. */ mcs_width = align(info->width, downscale_x * 4) / downscale_x; mcs_height = align(info->height, downscale_y * 4) / downscale_y; mcs_cpp = 16; /* an OWord */ } img->aux.enables = (1 << info->level_count) - 1; /* align to Y-tile */ img->aux.bo_stride = align(mcs_width * mcs_cpp, 128); img->aux.bo_height = align(mcs_height, 32); return true; } bool ilo_image_init(struct ilo_image *img, const struct ilo_dev *dev, const struct ilo_image_info *info) { struct ilo_image_layout layout; assert(ilo_is_zeroed(img, sizeof(*img))); memset(&layout, 0, sizeof(layout)); layout.lods = img->lods; if (!image_get_gen6_layout(dev, info, &layout)) return false; img->type = info->type; img->format = info->format; img->block_width = info->block_width; img->block_height = info->block_height; img->block_size = info->block_size; img->width0 = info->width; img->height0 = info->height; img->depth0 = info->depth; img->array_size = info->array_size; img->level_count = info->level_count; img->sample_count = info->sample_count; img->walk = layout.walk; img->interleaved_samples = layout.interleaved_samples; img->tiling = layout.tiling; img->aux.type = layout.aux; img->align_i = layout.align_i; img->align_j = layout.align_j; img->walk_layer_height = layout.walk_layer_height; if (!image_set_gen6_bo_size(img, dev, info, &layout)) return false; img->scanout = info->bind_scanout; switch (layout.aux) { case ILO_IMAGE_AUX_HIZ: image_set_gen6_hiz(img, dev, info, &layout); break; case ILO_IMAGE_AUX_MCS: image_set_gen7_mcs(img, dev, info, &layout); break; default: break; } return true; }