/* * Mesa 3-D graphics library * * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Authors: * Chia-I Wu */ #include "genhw/genhw.h" #include "core/ilo_builder_media.h" #include "core/ilo_builder_mi.h" #include "core/ilo_builder_render.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" struct gen7_l3_config { int slm; int urb; int rest; int dc; int ro; int is; int c; int t; }; /* * From the Ivy Bridge PRM, volume 1 part 7, page 10: * * "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to * distribute cycles. The following allocation is a suggested programming * model. Note all numbers below are given in KBytes." * * From the Haswell PRM, volume 7, page 662: * * "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C = * 0,T =0, SUM 512} was validated as a later supported configuration and * can be utilized if desired." */ static const struct gen7_l3_config gen7_l3_non_slm_configs[] = { /* SLM URB Rest DC RO I/S C T */ [0] = { 0, 256, 0, 0, 256, 0, 0, 0, }, [1] = { 0, 256, 0, 128, 128, 0, 0, 0, }, [2] = { 0, 256, 0, 32, 0, 64, 32, 128, }, [3] = { 0, 224, 0, 64, 0, 64, 32, 128, }, [4] = { 0, 224, 0, 128, 0, 64, 32, 64, }, [5] = { 0, 224, 0, 64, 0, 128, 32, 64, }, [6] = { 0, 224, 0, 0, 0, 128, 32, 128, }, [7] = { 0, 256, 0, 0, 0, 128, 0, 128, }, [8] = { 0, 224, 0, 32, 256, 0, 0, 0, }, }; /* * From the Ivy Bridge PRM, volume 1 part 7, page 11: * * "With the existence of Shared Local Memory, a 64KB chunk from each of * the 2 L3 banks will be reserved for SLM usage. The remaining cache * space is divided between the remaining clients. SLM allocation is done * via reducing the number of ways on the two banks from 64 to 32." * * From the Haswell PRM, volume 7, page 662: * * "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C = * 0,T =0, SUM 512} was validated as a later supported configuration and * can be utilized if desired. For this configuration, global atomics * must be programmed to be in GTI." */ static const struct gen7_l3_config gen7_l3_slm_configs[] = { /* SLM URB Rest DC RO I/S C T */ [0] = { 128, 128, 0, 128, 128, 0, 0, 0, }, [1] = { 128, 128, 0, 64, 0, 64, 64, 64, }, [2] = { 128, 128, 0, 32, 0, 64, 32, 128, }, [3] = { 128, 128, 0, 32, 0, 128, 32, 64, }, [4] = { 128, 128, 0, 0, 256, 0, 0, 0, }, }; static void gen7_launch_grid_l3(struct ilo_render *r, bool use_slm) { uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3; const struct gen7_l3_config *conf; /* * This function mostly follows what beignet does. I do not know why, for * example, CON4DCUNC should be reset. I do not know if it should be set * again after launch_grid(). */ ILO_DEV_ASSERT(r->dev, 7, 7.5); if (use_slm) conf = &gen7_l3_slm_configs[1]; else conf = &gen7_l3_non_slm_configs[4]; /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */ if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) { l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 | GEN75_REG_L3SQCREG1_SQHPCI_8; } else { l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6; } l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT | (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT | (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT; l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT | (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT | (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT; if (conf->slm) { /* * From the Ivy Bridge PRM, volume 1 part 7, page 11: * * "Note that URB needs to be set as low b/w client in SLM mode, * else the hash will fail. This is a required s/w model." */ l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB | GEN7_REG_L3CNTLREG2_SLMMENB; } gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1); gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2); gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3); } int ilo_render_get_launch_grid_commands_len(const struct ilo_render *render, const struct ilo_state_vector *vec) { static int len; ILO_DEV_ASSERT(render->dev, 7, 7.5); if (!len) { len += GEN6_PIPELINE_SELECT__SIZE + GEN6_STATE_BASE_ADDRESS__SIZE + GEN6_MEDIA_VFE_STATE__SIZE + GEN6_MEDIA_CURBE_LOAD__SIZE + GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE + GEN6_MEDIA_STATE_FLUSH__SIZE; len += ilo_render_get_flush_len(render) * 3; if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) { len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2; len += GEN7_GPGPU_WALKER__SIZE; } } return len; } void ilo_render_emit_launch_grid_commands(struct ilo_render *render, const struct ilo_state_vector *vec, const struct ilo_render_launch_grid_session *session) { const unsigned batch_used = ilo_builder_batch_used(render->builder); const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER; const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size; int simd_size; bool use_slm; ILO_DEV_ASSERT(render->dev, 7, 7.5); simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE); use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE); ilo_render_emit_flush(render); if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) { gen7_launch_grid_l3(render, use_slm); ilo_render_emit_flush(render); gen6_PIPELINE_SELECT(render->builder, GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU); } else { gen6_PIPELINE_SELECT(render->builder, GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA); } gen6_state_base_address(render->builder, true); gen6_MEDIA_VFE_STATE(render->builder, &session->compute); if (pcb_size) gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size); gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder, session->idrt, session->idrt_size); gen7_GPGPU_WALKER(render->builder, session->thread_group_offset, session->thread_group_dim, session->thread_group_size, simd_size); gen6_MEDIA_STATE_FLUSH(render->builder); if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) { ilo_render_emit_flush(render); gen7_launch_grid_l3(render, false); } assert(ilo_builder_batch_used(render->builder) <= batch_used + ilo_render_get_launch_grid_commands_len(render, vec)); }