/* * Mesa 3-D graphics library * * Copyright (C) 2012-2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include "genhw/genhw.h" #include "util/u_dual_blend.h" #include "util/u_framebuffer.h" #include "util/u_half.h" #include "util/u_resource.h" #include "ilo_context.h" #include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_state_3d.h" static void ve_init_cso(const struct ilo_dev_info *dev, const struct pipe_vertex_element *state, unsigned vb_index, struct ilo_ve_cso *cso) { int comp[4] = { GEN6_VFCOMP_STORE_SRC, GEN6_VFCOMP_STORE_SRC, GEN6_VFCOMP_STORE_SRC, GEN6_VFCOMP_STORE_SRC, }; int format; ILO_DEV_ASSERT(dev, 6, 7.5); switch (util_format_get_nr_components(state->src_format)) { case 1: comp[1] = GEN6_VFCOMP_STORE_0; case 2: comp[2] = GEN6_VFCOMP_STORE_0; case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? GEN6_VFCOMP_STORE_1_INT : GEN6_VFCOMP_STORE_1_FP; } format = ilo_translate_vertex_format(dev, state->src_format); STATIC_ASSERT(Elements(cso->payload) >= 2); cso->payload[0] = vb_index << GEN6_VE_STATE_DW0_VB_INDEX__SHIFT | GEN6_VE_STATE_DW0_VALID | format << GEN6_VE_STATE_DW0_FORMAT__SHIFT | state->src_offset << GEN6_VE_STATE_DW0_VB_OFFSET__SHIFT; cso->payload[1] = comp[0] << GEN6_VE_STATE_DW1_COMP0__SHIFT | comp[1] << GEN6_VE_STATE_DW1_COMP1__SHIFT | comp[2] << GEN6_VE_STATE_DW1_COMP2__SHIFT | comp[3] << GEN6_VE_STATE_DW1_COMP3__SHIFT; } void ilo_gpe_init_ve(const struct ilo_dev_info *dev, unsigned num_states, const struct pipe_vertex_element *states, struct ilo_ve_state *ve) { unsigned i; ILO_DEV_ASSERT(dev, 6, 7.5); ve->count = num_states; ve->vb_count = 0; for (i = 0; i < num_states; i++) { const unsigned pipe_idx = states[i].vertex_buffer_index; const unsigned instance_divisor = states[i].instance_divisor; unsigned hw_idx; /* * map the pipe vb to the hardware vb, which has a fixed instance * divisor */ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { if (ve->vb_mapping[hw_idx] == pipe_idx && ve->instance_divisors[hw_idx] == instance_divisor) break; } /* create one if there is no matching hardware vb */ if (hw_idx >= ve->vb_count) { hw_idx = ve->vb_count++; ve->vb_mapping[hw_idx] = pipe_idx; ve->instance_divisors[hw_idx] = instance_divisor; } ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); } } void ilo_gpe_set_ve_edgeflag(const struct ilo_dev_info *dev, struct ilo_ve_cso *cso) { int format; ILO_DEV_ASSERT(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 94: * * "- This bit (Edge Flag Enable) must only be ENABLED on the last * valid VERTEX_ELEMENT structure. * * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, * and Component 1-3 Control must be set to VFCOMP_NOSTORE. * * - The Source Element Format must be set to the UINT format. * * - [DevSNB]: Edge Flags are not supported for QUADLIST * primitives. Software may elect to convert QUADLIST primitives * to some set of corresponding edge-flag-supported primitive * types (e.g., POLYGONs) prior to submission to the 3D pipeline." */ cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; /* * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. * * Since all the hardware cares about is whether the flags are zero or not, * we can treat them as the corresponding _UINT formats. */ format = GEN_EXTRACT(cso->payload[0], GEN6_VE_STATE_DW0_FORMAT); cso->payload[0] &= ~GEN6_VE_STATE_DW0_FORMAT__MASK; switch (format) { case GEN6_FORMAT_R32_FLOAT: format = GEN6_FORMAT_R32_UINT; break; case GEN6_FORMAT_R8_USCALED: format = GEN6_FORMAT_R8_UINT; break; default: break; } cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_STATE_DW0_FORMAT); cso->payload[1] = GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; } void ilo_gpe_init_ve_nosrc(const struct ilo_dev_info *dev, int comp0, int comp1, int comp2, int comp3, struct ilo_ve_cso *cso) { ILO_DEV_ASSERT(dev, 6, 7.5); STATIC_ASSERT(Elements(cso->payload) >= 2); assert(comp0 != GEN6_VFCOMP_STORE_SRC && comp1 != GEN6_VFCOMP_STORE_SRC && comp2 != GEN6_VFCOMP_STORE_SRC && comp3 != GEN6_VFCOMP_STORE_SRC); cso->payload[0] = GEN6_VE_STATE_DW0_VALID; cso->payload[1] = comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; } void ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *vs, struct ilo_shader_cso *cso) { int start_grf, vue_read_len, sampler_count, max_threads; uint32_t dw2, dw4, dw5; ILO_DEV_ASSERT(dev, 6, 7.5); start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT); /* * From the Sandy Bridge PRM, volume 2 part 1, page 135: * * "(Vertex URB Entry Read Length) Specifies the number of pairs of * 128-bit vertex elements to be passed into the payload for each * vertex." * * "It is UNDEFINED to set this field to 0 indicating no Vertex URB * data to be read and passed to the thread." */ vue_read_len = (vue_read_len + 1) / 2; if (!vue_read_len) vue_read_len = 1; max_threads = dev->thread_count; if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2) max_threads *= 2; dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT | vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT; dw5 = GEN6_VS_DW5_STATISTICS | GEN6_VS_DW5_VS_ENABLE; if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT; else dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT; STATIC_ASSERT(Elements(cso->payload) >= 3); cso->payload[0] = dw2; cso->payload[1] = dw4; cso->payload[2] = dw5; } static void gs_init_cso_gen6(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, struct ilo_shader_cso *cso) { int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5, dw6; ILO_DEV_ASSERT(dev, 6, 6); if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); } else { start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); } /* * From the Sandy Bridge PRM, volume 2 part 1, page 153: * * "Specifies the amount of URB data read and passed in the thread * payload for each Vertex URB entry, in 256-bit register increments. * * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to * 0 indicating no Vertex URB data to be read and passed to the * thread." */ vue_read_len = (vue_read_len + 1) / 2; if (!vue_read_len) vue_read_len = 1; /* * From the Sandy Bridge PRM, volume 2 part 1, page 154: * * "Maximum Number of Threads valid range is [0,27] when Rendering * Enabled bit is set." * * From the Sandy Bridge PRM, volume 2 part 1, page 173: * * "Programming Note: If the GS stage is enabled, software must always * allocate at least one GS URB Entry. This is true even if the GS * thread never needs to output vertices to the pipeline, e.g., when * only performing stream output. This is an artifact of the need to * pass the GS thread an initial destination URB handle." * * As such, we always enable rendering, and limit the number of threads. */ if (dev->gt == 2) { /* maximum is 60, but limited to 28 */ max_threads = 28; } else { /* maximum is 24, but limited to 21 (see brwCreateContext()) */ max_threads = 21; } dw2 = GEN6_THREADDISP_SPF; dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT; dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT | GEN6_GS_DW5_STATISTICS | GEN6_GS_DW5_SO_STATISTICS | GEN6_GS_DW5_RENDER_ENABLE; /* * we cannot make use of GEN6_GS_REORDER because it will reorder * triangle strips according to D3D rules (triangle 2N+1 uses vertices * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices * (2N+2, 2N+1, 2N+3)). */ dw6 = GEN6_GS_DW6_GS_ENABLE; if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY; if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { const uint32_t svbi_post_inc = ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE; if (svbi_post_inc) { dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE | svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT; } } STATIC_ASSERT(Elements(cso->payload) >= 4); cso->payload[0] = dw2; cso->payload[1] = dw4; cso->payload[2] = dw5; cso->payload[3] = dw6; } static void gs_init_cso_gen7(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, struct ilo_shader_cso *cso) { int start_grf, vue_read_len, sampler_count, max_threads; uint32_t dw2, dw4, dw5; ILO_DEV_ASSERT(dev, 7, 7.5); start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT); /* in pairs */ vue_read_len = (vue_read_len + 1) / 2; switch (ilo_dev_gen(dev)) { case ILO_GEN(7.5): max_threads = (dev->gt >= 2) ? 256 : 70; break; case ILO_GEN(7): max_threads = (dev->gt == 2) ? 128 : 36; break; default: max_threads = 1; break; } dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT | GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES | 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT | start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT; dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT | GEN7_GS_DW5_STATISTICS | GEN7_GS_DW5_GS_ENABLE; STATIC_ASSERT(Elements(cso->payload) >= 3); cso->payload[0] = dw2; cso->payload[1] = dw4; cso->payload[2] = dw5; } void ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev, const struct ilo_shader_state *gs, struct ilo_shader_cso *cso) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) gs_init_cso_gen7(dev, gs, cso); else gs_init_cso_gen6(dev, gs, cso); } static void view_init_null_gen6(const struct ilo_dev_info *dev, unsigned width, unsigned height, unsigned depth, unsigned level, struct ilo_view_surface *surf) { uint32_t *dw; ILO_DEV_ASSERT(dev, 6, 6); assert(width >= 1 && height >= 1 && depth >= 1); /* * From the Sandy Bridge PRM, volume 4 part 1, page 71: * * "A null surface will be used in instances where an actual surface is * not bound. When a write message is generated to a null surface, no * actual surface is written to. When a read message (including any * sampling engine message) is generated to a null surface, the result * is all zeros. Note that a null surface type is allowed to be used * with all messages, even if it is not specificially indicated as * supported. All of the remaining fields in surface state are ignored * for null surfaces, with the following exceptions: * * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the * depth buffer's corresponding state for all render target * surfaces, including null. * * Surface Format must be R8G8B8A8_UNORM." * * From the Sandy Bridge PRM, volume 4 part 1, page 82: * * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be * true" */ STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT | GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT; dw[1] = 0; dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | GEN6_TILING_X; dw[4] = 0; dw[5] = 0; surf->bo = NULL; } static void view_init_for_buffer_gen6(const struct ilo_dev_info *dev, const struct ilo_buffer *buf, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { const int elem_size = util_format_get_blocksize(elem_format); int width, height, depth, pitch; int surface_format, num_entries; uint32_t *dw; ILO_DEV_ASSERT(dev, 6, 6); /* * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a * structure in a buffer. */ surface_format = ilo_translate_color_format(dev, elem_format); num_entries = size / struct_size; /* see if there is enough space to fit another element */ if (size % struct_size >= elem_size) num_entries++; /* * From the Sandy Bridge PRM, volume 4 part 1, page 76: * * "For SURFTYPE_BUFFER render targets, this field (Surface Base * Address) specifies the base address of first element of the * surface. The surface is interpreted as a simple array of that * single element type. The address must be naturally-aligned to the * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements * must be 16-byte aligned). * * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies * the base address of the first element of the surface, computed in * software by adding the surface base address to the byte offset of * the element in the buffer." */ if (is_rt) assert(offset % elem_size == 0); /* * From the Sandy Bridge PRM, volume 4 part 1, page 77: * * "For buffer surfaces, the number of entries in the buffer ranges * from 1 to 2^27." */ assert(num_entries >= 1 && num_entries <= 1 << 27); /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) * indicates the size of the structure." */ pitch = struct_size; pitch--; num_entries--; /* bits [6:0] */ width = (num_entries & 0x0000007f); /* bits [19:7] */ height = (num_entries & 0x000fff80) >> 7; /* bits [26:20] */ depth = (num_entries & 0x07f00000) >> 20; STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT; if (render_cache_rw) dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; dw[1] = offset; dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT | width << GEN6_SURFACE_DW2_WIDTH__SHIFT; dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT | pitch << GEN6_SURFACE_DW3_PITCH__SHIFT; dw[4] = 0; dw[5] = 0; /* do not increment reference count */ surf->bo = buf->bo; } static void view_init_for_texture_gen6(const struct ilo_dev_info *dev, const struct ilo_texture *tex, enum pipe_format format, unsigned first_level, unsigned num_levels, unsigned first_layer, unsigned num_layers, bool is_rt, struct ilo_view_surface *surf) { int surface_type, surface_format; int width, height, depth, pitch, lod; uint32_t *dw; ILO_DEV_ASSERT(dev, 6, 6); surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); assert(surface_type != GEN6_SURFTYPE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) format = PIPE_FORMAT_Z32_FLOAT; if (is_rt) surface_format = ilo_translate_render_format(dev, format); else surface_format = ilo_translate_texture_format(dev, format); assert(surface_format >= 0); width = tex->layout.width0; height = tex->layout.height0; depth = (tex->base.target == PIPE_TEXTURE_3D) ? tex->base.depth0 : num_layers; pitch = tex->layout.bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { /* * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the * range of this field (Depth) is [0,84], indicating the number of * cube array elements (equal to the number of underlying 2D array * elements divided by 6). For other surfaces, this field must be * zero." * * When is_rt is true, we treat the texture as a 2D one to avoid the * restriction. */ if (is_rt) { surface_type = GEN6_SURFTYPE_2D; } else { assert(num_layers % 6 == 0); depth = num_layers / 6; } } /* sanity check the size */ assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); switch (surface_type) { case GEN6_SURFTYPE_1D: assert(width <= 8192 && height == 1 && depth <= 512); assert(first_layer < 512 && num_layers <= 512); break; case GEN6_SURFTYPE_2D: assert(width <= 8192 && height <= 8192 && depth <= 512); assert(first_layer < 512 && num_layers <= 512); break; case GEN6_SURFTYPE_3D: assert(width <= 2048 && height <= 2048 && depth <= 2048); assert(first_layer < 2048 && num_layers <= 512); if (!is_rt) assert(first_layer == 0); break; case GEN6_SURFTYPE_CUBE: assert(width <= 8192 && height <= 8192 && depth <= 85); assert(width == height); assert(first_layer < 512 && num_layers <= 512); if (is_rt) assert(first_layer == 0); break; default: assert(!"unexpected surface type"); break; } /* non-full array spacing is supported only on GEN7+ */ assert(tex->layout.walk != ILO_LAYOUT_WALK_LOD); /* non-interleaved samples are supported only on GEN7+ */ if (tex->base.nr_samples > 1) assert(tex->layout.interleaved_samples); if (is_rt) { assert(num_levels == 1); lod = first_level; } else { lod = num_levels - 1; } /* * From the Sandy Bridge PRM, volume 4 part 1, page 76: * * "Linear render target surface base addresses must be element-size * aligned, for non-YUV surface formats, or a multiple of 2 * element-sizes for YUV surface formats. Other linear surfaces have * no alignment requirements (byte alignment is sufficient.)" * * From the Sandy Bridge PRM, volume 4 part 1, page 81: * * "For linear render target surfaces, the pitch must be a multiple * of the element size for non-YUV surface formats. Pitch must be a * multiple of 2 * element size for YUV surface formats." * * From the Sandy Bridge PRM, volume 4 part 1, page 86: * * "For linear surfaces, this field (X Offset) must be zero" */ if (tex->layout.tiling == INTEL_TILING_NONE) { if (is_rt) { const int elem_size = util_format_get_blocksize(format); assert(pitch % elem_size == 0); } } STATIC_ASSERT(Elements(surf->payload) >= 6); dw = surf->payload; dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT | GEN6_SURFACE_DW0_MIPLAYOUT_BELOW; if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) { dw[0] |= 1 << 9 | GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; } if (is_rt) dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW; dw[1] = 0; dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT | (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | ilo_gpe_gen6_translate_winsys_tiling(tex->layout.tiling); dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | first_layer << 17 | (num_layers - 1) << 8 | ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 : GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1); dw[5] = 0; assert(tex->layout.align_j == 2 || tex->layout.align_j == 4); if (tex->layout.align_j == 4) dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; /* do not increment reference count */ surf->bo = tex->bo; } static void view_init_null_gen7(const struct ilo_dev_info *dev, unsigned width, unsigned height, unsigned depth, unsigned level, struct ilo_view_surface *surf) { uint32_t *dw; ILO_DEV_ASSERT(dev, 7, 7.5); assert(width >= 1 && height >= 1 && depth >= 1); /* * From the Ivy Bridge PRM, volume 4 part 1, page 62: * * "A null surface is used in instances where an actual surface is not * bound. When a write message is generated to a null surface, no * actual surface is written to. When a read message (including any * sampling engine message) is generated to a null surface, the result * is all zeros. Note that a null surface type is allowed to be used * with all messages, even if it is not specificially indicated as * supported. All of the remaining fields in surface state are ignored * for null surfaces, with the following exceptions: * * * Width, Height, Depth, LOD, and Render Target View Extent fields * must match the depth buffer's corresponding state for all render * target surfaces, including null. * * All sampling engine and data port messages support null surfaces * with the above behavior, even if not mentioned as specifically * supported, except for the following: * * Data Port Media Block Read/Write messages. * * The Surface Type of a surface used as a render target (accessed * via the Data Port's Render Target Write message) must be the same * as the Surface Type of all other render targets and of the depth * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth * buffer or render targets are SURFTYPE_NULL." * * From the Ivy Bridge PRM, volume 4 part 1, page 65: * * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be * true" */ STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT | GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT | GEN6_TILING_X << 13; dw[1] = 0; dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH); dw[4] = 0; dw[5] = level; dw[6] = 0; dw[7] = 0; surf->bo = NULL; } static void view_init_for_buffer_gen7(const struct ilo_dev_info *dev, const struct ilo_buffer *buf, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { const bool typed = (elem_format != PIPE_FORMAT_NONE); const bool structured = (!typed && struct_size > 1); const int elem_size = (typed) ? util_format_get_blocksize(elem_format) : 1; int width, height, depth, pitch; int surface_type, surface_format, num_entries; uint32_t *dw; ILO_DEV_ASSERT(dev, 7, 7.5); surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER; surface_format = (typed) ? ilo_translate_color_format(dev, elem_format) : GEN6_FORMAT_RAW; num_entries = size / struct_size; /* see if there is enough space to fit another element */ if (size % struct_size >= elem_size && !structured) num_entries++; /* * From the Ivy Bridge PRM, volume 4 part 1, page 67: * * "For SURFTYPE_BUFFER render targets, this field (Surface Base * Address) specifies the base address of first element of the * surface. The surface is interpreted as a simple array of that * single element type. The address must be naturally-aligned to the * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements * must be 16-byte aligned) * * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies * the base address of the first element of the surface, computed in * software by adding the surface base address to the byte offset of * the element in the buffer." */ if (is_rt) assert(offset % elem_size == 0); /* * From the Ivy Bridge PRM, volume 4 part 1, page 68: * * "For typed buffer and structured buffer surfaces, the number of * entries in the buffer ranges from 1 to 2^27. For raw buffer * surfaces, the number of entries in the buffer is the number of * bytes which can range from 1 to 2^30." */ assert(num_entries >= 1 && num_entries <= 1 << ((typed || structured) ? 27 : 30)); /* * From the Ivy Bridge PRM, volume 4 part 1, page 69: * * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be * 11 if the Surface Format is RAW (the size of the buffer must be a * multiple of 4 bytes)." * * From the Ivy Bridge PRM, volume 4 part 1, page 70: * * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this * field (Surface Pitch) indicates the size of the structure." * * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch * must be a multiple of 4 bytes." */ if (structured) assert(struct_size % 4 == 0); else if (!typed) assert(num_entries % 4 == 0); pitch = struct_size; pitch--; num_entries--; /* bits [6:0] */ width = (num_entries & 0x0000007f); /* bits [20:7] */ height = (num_entries & 0x001fff80) >> 7; /* bits [30:21] */ depth = (num_entries & 0x7fe00000) >> 21; /* limit to [26:21] */ if (typed || structured) depth &= 0x3f; STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT; if (render_cache_rw) dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; dw[1] = offset; dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) | GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH); dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) | pitch; dw[4] = 0; dw[5] = 0; dw[6] = 0; dw[7] = 0; if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); } /* do not increment reference count */ surf->bo = buf->bo; } static void view_init_for_texture_gen7(const struct ilo_dev_info *dev, const struct ilo_texture *tex, enum pipe_format format, unsigned first_level, unsigned num_levels, unsigned first_layer, unsigned num_layers, bool is_rt, struct ilo_view_surface *surf) { int surface_type, surface_format; int width, height, depth, pitch, lod; uint32_t *dw; ILO_DEV_ASSERT(dev, 7, 7.5); surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); assert(surface_type != GEN6_SURFTYPE_BUFFER); if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) format = PIPE_FORMAT_Z32_FLOAT; if (is_rt) surface_format = ilo_translate_render_format(dev, format); else surface_format = ilo_translate_texture_format(dev, format); assert(surface_format >= 0); width = tex->layout.width0; height = tex->layout.height0; depth = (tex->base.target == PIPE_TEXTURE_3D) ? tex->base.depth0 : num_layers; pitch = tex->layout.bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { /* * From the Ivy Bridge PRM, volume 4 part 1, page 70: * * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of * this field is [0,340], indicating the number of cube array * elements (equal to the number of underlying 2D array elements * divided by 6). For other surfaces, this field must be zero." * * When is_rt is true, we treat the texture as a 2D one to avoid the * restriction. */ if (is_rt) { surface_type = GEN6_SURFTYPE_2D; } else { assert(num_layers % 6 == 0); depth = num_layers / 6; } } /* sanity check the size */ assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); assert(first_layer < 2048 && num_layers <= 2048); switch (surface_type) { case GEN6_SURFTYPE_1D: assert(width <= 16384 && height == 1 && depth <= 2048); break; case GEN6_SURFTYPE_2D: assert(width <= 16384 && height <= 16384 && depth <= 2048); break; case GEN6_SURFTYPE_3D: assert(width <= 2048 && height <= 2048 && depth <= 2048); if (!is_rt) assert(first_layer == 0); break; case GEN6_SURFTYPE_CUBE: assert(width <= 16384 && height <= 16384 && depth <= 86); assert(width == height); if (is_rt) assert(first_layer == 0); break; default: assert(!"unexpected surface type"); break; } if (is_rt) { assert(num_levels == 1); lod = first_level; } else { lod = num_levels - 1; } /* * From the Ivy Bridge PRM, volume 4 part 1, page 68: * * "The Base Address for linear render target surfaces and surfaces * accessed with the typed surface read/write data port messages must * be element-size aligned, for non-YUV surface formats, or a multiple * of 2 element-sizes for YUV surface formats. Other linear surfaces * have no alignment requirements (byte alignment is sufficient)." * * From the Ivy Bridge PRM, volume 4 part 1, page 70: * * "For linear render target surfaces and surfaces accessed with the * typed data port messages, the pitch must be a multiple of the * element size for non-YUV surface formats. Pitch must be a multiple * of 2 * element size for YUV surface formats. For linear surfaces * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple * of 4 bytes.For other linear surfaces, the pitch can be any multiple * of bytes." * * From the Ivy Bridge PRM, volume 4 part 1, page 74: * * "For linear surfaces, this field (X Offset) must be zero." */ if (tex->layout.tiling == INTEL_TILING_NONE) { if (is_rt) { const int elem_size = util_format_get_blocksize(format); assert(pitch % elem_size == 0); } } STATIC_ASSERT(Elements(surf->payload) >= 8); dw = surf->payload; dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT | surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT | ilo_gpe_gen6_translate_winsys_tiling(tex->layout.tiling) << 13; /* * From the Ivy Bridge PRM, volume 4 part 1, page 63: * * "If this field (Surface Array) is enabled, the Surface Type must be * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or * SURFTYPE_CUBE, the Depth field must be set to zero." * * For non-3D sampler surfaces, resinfo (the sampler message) always * returns zero for the number of layers when this field is not set. */ if (surface_type != GEN6_SURFTYPE_3D) { if (util_resource_is_array_texture(&tex->base)) dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY; else assert(depth == 1); } assert(tex->layout.align_i == 4 || tex->layout.align_i == 8); assert(tex->layout.align_j == 2 || tex->layout.align_j == 4); if (tex->layout.align_j == 4) dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; if (tex->layout.align_i == 8) dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; if (tex->layout.walk == ILO_LAYOUT_WALK_LOD) dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; else dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; if (is_rt) dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW; if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; dw[1] = 0; dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) | GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH); dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) | (pitch - 1); dw[4] = first_layer << 18 | (num_layers - 1) << 7; /* * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL * means the samples are interleaved. The layouts are the same when the * number of samples is 1. */ if (tex->layout.interleaved_samples && tex->base.nr_samples > 1) { assert(!is_rt); dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; } else { dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS; } if (tex->base.nr_samples > 4) dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8; else if (tex->base.nr_samples > 2) dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4; else dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1; dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) | lod; dw[6] = 0; dw[7] = 0; if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) | GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) | GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) | GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A); } /* do not increment reference count */ surf->bo = tex->bo; } void ilo_gpe_init_view_surface_null(const struct ilo_dev_info *dev, unsigned width, unsigned height, unsigned depth, unsigned level, struct ilo_view_surface *surf) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { view_init_null_gen7(dev, width, height, depth, level, surf); } else { view_init_null_gen6(dev, width, height, depth, level, surf); } } void ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev_info *dev, const struct ilo_buffer *buf, unsigned offset, unsigned size, unsigned struct_size, enum pipe_format elem_format, bool is_rt, bool render_cache_rw, struct ilo_view_surface *surf) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { view_init_for_buffer_gen7(dev, buf, offset, size, struct_size, elem_format, is_rt, render_cache_rw, surf); } else { view_init_for_buffer_gen6(dev, buf, offset, size, struct_size, elem_format, is_rt, render_cache_rw, surf); } } void ilo_gpe_init_view_surface_for_texture(const struct ilo_dev_info *dev, const struct ilo_texture *tex, enum pipe_format format, unsigned first_level, unsigned num_levels, unsigned first_layer, unsigned num_layers, bool is_rt, struct ilo_view_surface *surf) { if (ilo_dev_gen(dev) >= ILO_GEN(7)) { view_init_for_texture_gen7(dev, tex, format, first_level, num_levels, first_layer, num_layers, is_rt, surf); } else { view_init_for_texture_gen6(dev, tex, format, first_level, num_levels, first_layer, num_layers, is_rt, surf); } } static void sampler_init_border_color_gen6(const struct ilo_dev_info *dev, const union pipe_color_union *color, uint32_t *dw, int num_dwords) { float rgba[4] = { color->f[0], color->f[1], color->f[2], color->f[3], }; ILO_DEV_ASSERT(dev, 6, 6); assert(num_dwords >= 12); /* * This state is not documented in the Sandy Bridge PRM, but in the * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. */ /* IEEE_FP */ dw[1] = fui(rgba[0]); dw[2] = fui(rgba[1]); dw[3] = fui(rgba[2]); dw[4] = fui(rgba[3]); /* FLOAT_16 */ dw[5] = util_float_to_half(rgba[0]) | util_float_to_half(rgba[1]) << 16; dw[6] = util_float_to_half(rgba[2]) | util_float_to_half(rgba[3]) << 16; /* clamp to [-1.0f, 1.0f] */ rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); /* SNORM16 */ dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | (int16_t) util_iround(rgba[1] * 32767.0f) << 16; dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | (int16_t) util_iround(rgba[3] * 32767.0f) << 16; /* SNORM8 */ dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | (int8_t) util_iround(rgba[1] * 127.0f) << 8 | (int8_t) util_iround(rgba[2] * 127.0f) << 16 | (int8_t) util_iround(rgba[3] * 127.0f) << 24; /* clamp to [0.0f, 1.0f] */ rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); /* UNORM8 */ dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | (uint8_t) util_iround(rgba[1] * 255.0f) << 8 | (uint8_t) util_iround(rgba[2] * 255.0f) << 16 | (uint8_t) util_iround(rgba[3] * 255.0f) << 24; /* UNORM16 */ dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | (uint16_t) util_iround(rgba[1] * 65535.0f) << 16; dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | (uint16_t) util_iround(rgba[3] * 65535.0f) << 16; } /** * Translate a pipe texture mipfilter to the matching hardware mipfilter. */ static int gen6_translate_tex_mipfilter(unsigned filter) { switch (filter) { case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST; case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR; case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE; default: assert(!"unknown mipfilter"); return GEN6_MIPFILTER_NONE; } } /** * Translate a pipe texture filter to the matching hardware mapfilter. */ static int gen6_translate_tex_filter(unsigned filter) { switch (filter) { case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST; case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR; default: assert(!"unknown sampler filter"); return GEN6_MAPFILTER_NEAREST; } } /** * Translate a pipe texture coordinate wrapping mode to the matching hardware * wrapping mode. */ static int gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge) { /* clamp to edge or border? */ if (wrap == PIPE_TEX_WRAP_CLAMP) { wrap = (clamp_to_edge) ? PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER; } switch (wrap) { case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER; case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR; case PIPE_TEX_WRAP_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(!"unknown sampler wrap mode"); return GEN6_TEXCOORDMODE_WRAP; } } /** * Translate a pipe shadow compare function to the matching hardware shadow * function. */ static int gen6_translate_shadow_func(unsigned func) { /* * For PIPE_FUNC_x, the reference value is on the left-hand side of the * comparison, and 1.0 is returned when the comparison is true. * * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of * the comparison, and 0.0 is returned when the comparison is true. */ switch (func) { case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS; case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL; case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL; case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS; case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL; case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL; case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER; case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER; default: assert(!"unknown shadow compare function"); return GEN6_COMPAREFUNCTION_NEVER; } } void ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, const struct pipe_sampler_state *state, struct ilo_sampler_cso *sampler) { int mip_filter, min_filter, mag_filter, max_aniso; int lod_bias, max_lod, min_lod; int wrap_s, wrap_t, wrap_r, wrap_cube; bool clamp_is_to_edge; uint32_t dw0, dw1, dw3; ILO_DEV_ASSERT(dev, 6, 7.5); memset(sampler, 0, sizeof(*sampler)); mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); min_filter = gen6_translate_tex_filter(state->min_img_filter); mag_filter = gen6_translate_tex_filter(state->mag_img_filter); sampler->anisotropic = state->max_anisotropy; if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) max_aniso = state->max_anisotropy / 2 - 1; else if (state->max_anisotropy > 16) max_aniso = GEN6_ANISORATIO_16; else max_aniso = GEN6_ANISORATIO_2; /* * * Here is how the hardware calculate per-pixel LOD, from my reading of the * PRMs: * * 1) LOD is set to log2(ratio of texels to pixels) if not specified in * other ways. The number of texels is measured using level * SurfMinLod. * 2) Bias is added to LOD. * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is * compared with Base to determine whether magnification or * minification is needed. (if preclamp is disabled, LOD is compared * with Base before clamping) * 4) If magnification is needed, or no mipmapping is requested, LOD is * set to floor(MinLod). * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. * * With Gallium interface, Base is always zero and * pipe_sampler_view::u.tex.first_level specifies SurfMinLod. */ if (ilo_dev_gen(dev) >= ILO_GEN(7)) { const float scale = 256.0f; /* [-16.0, 16.0) in S4.8 */ lod_bias = (int) (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); lod_bias &= 0x1fff; /* [0.0, 14.0] in U4.8 */ max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); } else { const float scale = 64.0f; /* [-16.0, 16.0) in S4.6 */ lod_bias = (int) (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); lod_bias &= 0x7ff; /* [0.0, 13.0] in U4.6 */ max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); } /* * We want LOD to be clamped to determine magnification/minification, and * get set to zero when it is magnification or when mipmapping is disabled. * The hardware would set LOD to floor(MinLod) and that is a problem when * MinLod is greater than or equal to 1.0f. * * With Base being zero, it is always minification when MinLod is non-zero. * To achieve our goal, we just need to set MinLod to zero and set * MagFilter to MinFilter when mipmapping is disabled. */ if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { min_lod = 0; mag_filter = min_filter; } /* * For nearest filtering, PIPE_TEX_WRAP_CLAMP means * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the * texture coordinates to [0.0, 1.0]. * * The clamping will be taken care of in the shaders. There are two * filters here, but let the minification one has a say. */ clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); if (!clamp_is_to_edge) { sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP); sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP); sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP); } /* determine wrap s/t/r */ wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge); wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge); wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge); /* * From the Sandy Bridge PRM, volume 4 part 1, page 107: * * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP * and TEXCOORDMODE_CUBE settings are valid, and each TC component * must have the same Address Control mode." * * From the Ivy Bridge PRM, volume 4 part 1, page 96: * * "This field (Cube Surface Control Mode) must be set to * CUBECTRLMODE_PROGRAMMED" * * Therefore, we cannot use "Cube Surface Control Mode" for semless cube * map filtering. */ if (state->seamless_cube_map && (state->min_img_filter != PIPE_TEX_FILTER_NEAREST || state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { wrap_cube = GEN6_TEXCOORDMODE_CUBE; } else { wrap_cube = GEN6_TEXCOORDMODE_CLAMP; } if (!state->normalized_coords) { /* * From the Ivy Bridge PRM, volume 4 part 1, page 98: * * "The following state must be set as indicated if this field * (Non-normalized Coordinate Enable) is enabled: * * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. * - Mag Mode Filter must be MAPFILTER_NEAREST or * MAPFILTER_LINEAR. * - Min Mode Filter must be MAPFILTER_NEAREST or * MAPFILTER_LINEAR. * - Mip Mode Filter must be MIPFILTER_NONE. * - Min LOD must be 0. * - Max LOD must be 0. * - MIP Count must be 0. * - Surface Min LOD must be 0. * - Texture LOD Bias must be 0." */ assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP || wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER); assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP || wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER); assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP || wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER); assert(mag_filter == GEN6_MAPFILTER_NEAREST || mag_filter == GEN6_MAPFILTER_LINEAR); assert(min_filter == GEN6_MAPFILTER_NEAREST || min_filter == GEN6_MAPFILTER_LINEAR); /* work around a bug in util_blitter */ mip_filter = GEN6_MIPFILTER_NONE; assert(mip_filter == GEN6_MIPFILTER_NONE); } if (ilo_dev_gen(dev) >= ILO_GEN(7)) { dw0 = 1 << 28 | mip_filter << 20 | lod_bias << 1; sampler->dw_filter = mag_filter << 17 | min_filter << 14; sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | GEN6_MAPFILTER_ANISOTROPIC << 14 | 1; dw1 = min_lod << 20 | max_lod << 8; if (state->compare_mode != PIPE_TEX_COMPARE_NONE) dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; dw3 = max_aniso << 19; /* round the coordinates for linear filtering */ if (min_filter != GEN6_MAPFILTER_NEAREST) { dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | GEN6_SAMPLER_DW3_V_MIN_ROUND | GEN6_SAMPLER_DW3_R_MIN_ROUND); } if (mag_filter != GEN6_MAPFILTER_NEAREST) { dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | GEN6_SAMPLER_DW3_V_MAG_ROUND | GEN6_SAMPLER_DW3_R_MAG_ROUND); } if (!state->normalized_coords) dw3 |= 1 << 10; sampler->dw_wrap = wrap_s << 6 | wrap_t << 3 | wrap_r; /* * As noted in the classic i965 driver, the HW may still reference * wrap_t and wrap_r for 1D textures. We need to set them to a safe * mode */ sampler->dw_wrap_1d = wrap_s << 6 | GEN6_TEXCOORDMODE_WRAP << 3 | GEN6_TEXCOORDMODE_WRAP; sampler->dw_wrap_cube = wrap_cube << 6 | wrap_cube << 3 | wrap_cube; STATIC_ASSERT(Elements(sampler->payload) >= 7); sampler->payload[0] = dw0; sampler->payload[1] = dw1; sampler->payload[2] = dw3; memcpy(&sampler->payload[3], state->border_color.ui, sizeof(state->border_color.ui)); } else { dw0 = 1 << 28 | mip_filter << 20 | lod_bias << 3; if (state->compare_mode != PIPE_TEX_COMPARE_NONE) dw0 |= gen6_translate_shadow_func(state->compare_func); sampler->dw_filter = (min_filter != mag_filter) << 27 | mag_filter << 17 | min_filter << 14; sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 | GEN6_MAPFILTER_ANISOTROPIC << 14; dw1 = min_lod << 22 | max_lod << 12; sampler->dw_wrap = wrap_s << 6 | wrap_t << 3 | wrap_r; sampler->dw_wrap_1d = wrap_s << 6 | GEN6_TEXCOORDMODE_WRAP << 3 | GEN6_TEXCOORDMODE_WRAP; sampler->dw_wrap_cube = wrap_cube << 6 | wrap_cube << 3 | wrap_cube; dw3 = max_aniso << 19; /* round the coordinates for linear filtering */ if (min_filter != GEN6_MAPFILTER_NEAREST) { dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND | GEN6_SAMPLER_DW3_V_MIN_ROUND | GEN6_SAMPLER_DW3_R_MIN_ROUND); } if (mag_filter != GEN6_MAPFILTER_NEAREST) { dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND | GEN6_SAMPLER_DW3_V_MAG_ROUND | GEN6_SAMPLER_DW3_R_MAG_ROUND); } if (!state->normalized_coords) dw3 |= 1; STATIC_ASSERT(Elements(sampler->payload) >= 15); sampler->payload[0] = dw0; sampler->payload[1] = dw1; sampler->payload[2] = dw3; sampler_init_border_color_gen6(dev, &state->border_color, &sampler->payload[3], 12); } }