From e198983c6119aa93b089d7883a9ec400ba52e7bb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 8 Aug 2016 21:32:30 -0700 Subject: i965/blorp: Use genxml for gen7 state setup Signed-off-by: Jason Ekstrand Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/Makefile.am | 10 +- src/mesa/drivers/dri/i965/Makefile.sources | 7 +- src/mesa/drivers/dri/i965/blorp.c | 5 +- src/mesa/drivers/dri/i965/blorp_priv.h | 4 + src/mesa/drivers/dri/i965/gen7_blorp.c | 540 ---------------------------- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 298 +++++++++++++-- 6 files changed, 299 insertions(+), 565 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/gen7_blorp.c diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 02e46ad..ad4e365 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -49,11 +49,19 @@ brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compil $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false) I965_PERGEN_LIBS = \ - libi965_gen6.la + libi965_gen6.la \ + libi965_gen7.la \ + libi965_gen75.la libi965_gen6_la_SOURCES = $(i965_gen6_FILES) libi965_gen6_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=60 +libi965_gen7_la_SOURCES = $(i965_gen7_FILES) +libi965_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70 + +libi965_gen75_la_SOURCES = $(i965_gen75_FILES) +libi965_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75 + noinst_LTLIBRARIES = \ libi965_dri.la \ libi965_compiler.la \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 11dc08c..130f2b9 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -192,7 +192,6 @@ i965_FILES = \ gen6_viewport_state.c \ gen6_vs_state.c \ gen6_wm_state.c \ - gen7_blorp.c \ gen7_cs_state.c \ gen7_ds_state.c \ gen7_gs_state.c \ @@ -263,3 +262,9 @@ i965_FILES = \ i965_gen6_FILES = \ genX_blorp_exec.c + +i965_gen7_FILES = \ + genX_blorp_exec.c + +i965_gen75_FILES = \ + genX_blorp_exec.c diff --git a/src/mesa/drivers/dri/i965/blorp.c b/src/mesa/drivers/dri/i965/blorp.c index 9e53753..87cf2c9 100644 --- a/src/mesa/drivers/dri/i965/blorp.c +++ b/src/mesa/drivers/dri/i965/blorp.c @@ -315,7 +315,10 @@ retry: gen6_blorp_exec(brw, params); break; case 7: - gen7_blorp_exec(brw, params); + if (brw->is_haswell) + gen75_blorp_exec(brw, params); + else + gen7_blorp_exec(brw, params); break; case 8: case 9: diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h b/src/mesa/drivers/dri/i965/blorp_priv.h index 730665e..ce6aaa7 100644 --- a/src/mesa/drivers/dri/i965/blorp_priv.h +++ b/src/mesa/drivers/dri/i965/blorp_priv.h @@ -197,6 +197,10 @@ gen7_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params); void +gen75_blorp_exec(struct brw_context *brw, + const struct brw_blorp_params *params); + +void gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params); struct brw_blorp_blit_prog_key diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c deleted file mode 100644 index 1286b55..0000000 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "blorp_priv.h" - - -/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS - * - * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -static void -gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, - uint32_t depthstencil_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(depthstencil_offset | 1); - ADVANCE_BATCH(); -} - - -/* Hardware seems to try to fetch the constants even though the corresponding - * stage gets disabled. Therefore make sure the settings for the constant - * buffer are valid. - */ -static void -gen7_blorp_disable_constant_state(struct brw_context *brw, - unsigned opcode) -{ - BEGIN_BATCH(7); - OUT_BATCH(opcode << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_VS - * - * Disable vertex shader. - */ -static void -gen7_blorp_emit_vs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_HS - * - * Disable the hull shader. - */ -static void -gen7_blorp_emit_hs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_DS - * - * Disable the domain shader. - */ -static void -gen7_blorp_emit_ds_disable(struct brw_context *brw) -{ - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_GS - * - * Disable the geometry shader. - */ -static void -gen7_blorp_emit_gs_disable(struct brw_context *brw) -{ - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (brw->gen < 8 && !brw->is_haswell && brw->gt == 2 && brw->gs.enabled) - gen7_emit_cs_stall_flush(brw); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - brw->gs.enabled = false; -} - -/* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ -static void -gen7_blorp_emit_streamout_disable(struct brw_context *brw) -{ - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -static void -gen7_blorp_emit_sf_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw1.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) - * and BackFaceFillMode (dw1.4:3) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); - OUT_BATCH(params->depth_format << - GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(params->dst.surf.samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_SBE */ - { - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned urb_read_length = - brw_blorp_get_urb_length(params->wm_prog_data); - - BEGIN_BATCH(14); - OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); - - /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the - * vertex data coming from vertex fetcher is taken as unmodified - * (i.e., passed through). Vertex shader state is disabled and vertex - * fetcher builds complete vertex entries including VUE header. - * This is for unknown reason really needed to be disabled when more - * than one vec4 worth of vertex attributes are needed. - */ - OUT_BATCH(num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | - urb_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << - GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 9; ++i) - OUT_BATCH(0); - OUT_BATCH(params->wm_prog_data ? params->wm_prog_data->flat_inputs : 0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - - -/** - * Disable thread dispatch (dw5.19) and enable the HiZ op. - */ -static void -gen7_blorp_emit_wm_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw1 = 0, dw2 = 0; - - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ - - if (params->wm_prog_data) - dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ - - if (params->src.bo) - dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */ - - if (params->dst.surf.samples > 1) { - dw1 |= GEN7_WM_MSRAST_ON_PATTERN; - if (prog_data && prog_data->persample_msaa_dispatch) - dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; - else - dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; - } else { - dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; - dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; - } - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); - OUT_BATCH(dw1); - OUT_BATCH(dw2); - ADVANCE_BATCH(); -} - - -/** - * 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the - * valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ -static void -gen7_blorp_emit_ps_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw2, dw4, dw5, ksp0, ksp2; - const int max_threads_shift = brw->is_haswell ? - HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - - dw2 = dw4 = dw5 = ksp0 = ksp2 = 0; - dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; - - if (brw->is_haswell) - dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ - if (params->wm_prog_data) { - dw5 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; - dw5 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; - - ksp0 = params->wm_prog_kernel; - ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; - - if (params->wm_prog_data->dispatch_8) - dw4 |= GEN7_PS_8_DISPATCH_ENABLE; - if (params->wm_prog_data->dispatch_16) - dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - if (params->wm_prog_data->num_varying_inputs) - dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; - } else { - /* The hardware gets angry if we don't enable at least one dispatch - * mode, so just enable 16-pixel dispatch if we don't have a program. - */ - dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - } - - if (params->src.bo) - dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ - - dw4 |= params->fast_clear_op; - - BEGIN_BATCH(8); - OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_BATCH(ksp0); - OUT_BATCH(dw2); - OUT_BATCH(0); - OUT_BATCH(dw4); - OUT_BATCH(dw5); - OUT_BATCH(0); /* kernel 1 pointer */ - OUT_BATCH(ksp2); - ADVANCE_BATCH(); -} - - -static void -gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const uint8_t mocs = GEN7_MOCS_L3; - uint32_t surftype; - - switch (params->depth.surf.dim) { - case ISL_SURF_DIM_1D: - surftype = BRW_SURFACE_1D; - break; - case ISL_SURF_DIM_2D: - surftype = BRW_SURFACE_2D; - break; - case ISL_SURF_DIM_3D: - surftype = BRW_SURFACE_3D; - break; - } - - /* 3DSTATE_DEPTH_BUFFER */ - { - brw_emit_depth_stall_flushes(brw); - - unsigned depth = MAX2(params->depth.surf.logical_level0_px.depth, - params->depth.surf.logical_level0_px.array_len); - - BEGIN_BATCH(7); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH((params->depth.surf.row_pitch - 1) | - params->depth_format << 18 | - 1 << 22 | /* hiz enable */ - 1 << 28 | /* depth write */ - surftype << 29); - OUT_RELOC(params->depth.bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - params->depth.offset); - OUT_BATCH((params->depth.surf.logical_level0_px.width - 1) << 4 | - (params->depth.surf.logical_level0_px.height - 1) << 18 | - params->depth.view.base_level); - OUT_BATCH(((depth - 1) << 21) | - (params->depth.view.base_array_layer << 10) | - mocs); - OUT_BATCH(0); - OUT_BATCH((depth - 1) << 21); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HIER_DEPTH_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH((mocs << 25) | - (params->depth.aux_surf.row_pitch - 1)); - OUT_RELOC(params->depth.aux_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - params->depth.aux_offset); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STENCIL_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - - -static void -gen7_blorp_emit_depth_disable(struct brw_context *brw) -{ - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(7); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/** - * \copydoc gen6_blorp_exec() - */ -void -gen7_blorp_exec(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - if (brw->gen >= 8) - return; - - uint32_t cc_blend_state_offset = 0; - uint32_t cc_state_offset = 0; - uint32_t depthstencil_offset; - uint32_t wm_bind_bo_offset = 0; - - brw_upload_state_base_address(brw); - gen6_blorp_emit_vertices(brw, params); - gen7_blorp_emit_urb_config(brw, params); - if (params->wm_prog_data) { - cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); - cc_state_offset = gen6_blorp_emit_cc_state(brw); - gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); - gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); - } - depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); - gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset); - - gen7_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_VS); - gen7_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_HS); - gen7_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_DS); - gen7_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_GS); - gen7_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_PS); - - if (brw->use_resource_streamer) - gen7_disable_hw_binding_tables(brw); - if (params->wm_prog_data) { - uint32_t wm_surf_offset_renderbuffer; - uint32_t wm_surf_offset_texture = 0; - - wm_surf_offset_renderbuffer = - brw_blorp_emit_surface_state(brw, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - true /* is_render_target */); - if (params->src.bo) { - wm_surf_offset_texture = - brw_blorp_emit_surface_state(brw, ¶ms->src, - I915_GEM_DOMAIN_SAMPLER, 0, - false /* is_render_target */); - } - wm_bind_bo_offset = - gen6_blorp_emit_binding_table(brw, - wm_surf_offset_renderbuffer, - wm_surf_offset_texture); - gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); - } - - if (params->src.bo) { - const uint32_t sampler_offset = - gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); - } - - gen6_emit_3dstate_multisample(brw, params->dst.surf.samples); - gen6_emit_3dstate_sample_mask(brw, - params->dst.surf.samples > 1 ? - (1 << params->dst.surf.samples) - 1 : 1); - - gen7_blorp_emit_vs_disable(brw); - gen7_blorp_emit_hs_disable(brw); - gen7_blorp_emit_te_disable(brw); - gen7_blorp_emit_ds_disable(brw); - gen7_blorp_emit_gs_disable(brw); - gen7_blorp_emit_streamout_disable(brw); - gen6_blorp_emit_clip_disable(brw); - gen7_blorp_emit_sf_config(brw, params); - gen7_blorp_emit_wm_config(brw, params); - gen7_blorp_emit_ps_config(brw, params); - gen7_blorp_emit_cc_viewport(brw); - - if (params->depth.bo) - gen7_blorp_emit_depth_stencil_config(brw, params); - else - gen7_blorp_emit_depth_disable(brw); - gen7_blorp_emit_clear_params(brw, params); - gen6_blorp_emit_drawing_rectangle(brw, params); - gen7_blorp_emit_primitive(brw, params); -} diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index d19e235..1eba713 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -106,6 +106,76 @@ __gen_combine_address(struct brw_context *brw, void *location, _dw + 1; /* Array starts at dw[1] */ \ }) +/* Once vertex fetcher has written full VUE entries with complete + * header the space requirement is as follows per vertex (in bytes): + * + * Header Position Program constants + * +--------+------------+-------------------+ + * | 16 | 16 | n x 16 | + * +--------+------------+-------------------+ + * + * where 'n' stands for number of varying inputs expressed as vec4s. + * + * The URB size is in turn expressed in 64 bytes (512 bits). + */ +static inline unsigned +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) +{ + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned total_needed = 16 + 16 + num_varyings * 16; + + return DIV_ROUND_UP(total_needed, 64); +} + +/* 3DSTATE_URB +/* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: + * + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ +static void +emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ +#if GEN_GEN >= 7 + const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); + + if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && + brw->urb.vsize >= vs_entry_size) + return; + + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; + + gen7_upload_urb(brw, vs_entry_size, false, false); +#else + blorp_emit(brw, GENX(3DSTATE_URB), urb) { + urb.VSNumberofURBEntries = brw->urb.max_vs_entries; + } +#endif +} + static void blorp_emit_vertex_data(struct brw_context *brw, const struct brw_blorp_params *params, @@ -182,10 +252,20 @@ blorp_emit_vertex_buffers(struct brw_context *brw, unsigned num_buffers = 1; +#if GEN_GEN == 7 + uint32_t mocs = 1 /* GEN7_MOCS_L3 */; +#else + uint32_t mocs = 0; +#endif + uint32_t size; blorp_emit_vertex_data(brw, params, &vb[0].BufferStartingAddress, &size); vb[0].VertexBufferIndex = 0; vb[0].BufferPitch = 2 * sizeof(float); + vb[0].VertexBufferMOCS = mocs; +#if GEN_GEN >= 7 + vb[0].AddressModifyEnable = true; +#endif vb[0].BufferAccessType = VERTEXDATA; vb[0].EndAddress = vb[0].BufferStartingAddress; vb[0].EndAddress.offset += size - 1; @@ -196,6 +276,10 @@ blorp_emit_vertex_buffers(struct brw_context *brw, vb[1].VertexBufferIndex = 1; vb[1].BufferPitch = 0; vb[1].BufferAccessType = INSTANCEDATA; + vb[1].VertexBufferMOCS = mocs; +#if GEN_GEN >= 7 + vb[1].AddressModifyEnable = true; +#endif vb[1].EndAddress = vb[1].BufferStartingAddress; vb[1].EndAddress.offset += size; num_buffers++; @@ -329,6 +413,35 @@ blorp_emit_sf_config(struct brw_context *brw, * is rendered as a solid object. This setting is required when * (rendering rectangle (RECTLIST) objects. */ + +#if GEN_GEN >= 7 + + blorp_emit(brw, GENX(3DSTATE_SF), sf) { + sf.FrontFaceFillMode = FILL_MODE_SOLID; + sf.BackFaceFillMode = FILL_MODE_SOLID; + + sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? + MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; + +#if GEN_GEN == 7 + sf.DepthBufferSurfaceFormat = params->depth_format; +#endif + } + + blorp_emit(brw, GENX(3DSTATE_SBE), sbe) { + sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; + if (prog_data) { + sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; + sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); + sbe.ConstantInterpolationEnable = prog_data->flat_inputs; + } else { + sbe.NumberofSFOutputAttributes = 0; + sbe.VertexURBEntryReadLength = 1; + } + } + +#else /* GEN_GEN <= 6 */ + blorp_emit(brw, GENX(3DSTATE_SF), sf) { sf.FrontFaceFillMode = FILL_MODE_SOLID; sf.BackFaceFillMode = FILL_MODE_SOLID; @@ -346,10 +459,12 @@ blorp_emit_sf_config(struct brw_context *brw, sf.VertexURBEntryReadLength = 1; } } + +#endif /* GEN_GEN */ } static void -blorp_emit_wm_config(struct brw_context *brw, +blorp_emit_ps_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; @@ -362,6 +477,86 @@ blorp_emit_wm_config(struct brw_context *brw, * To be safe (and to minimize extraneous code) we go ahead and fully * configure the WM state whether or not there is a WM program. */ + +#if GEN_GEN >= 7 + + blorp_emit(brw, GENX(3DSTATE_WM), wm) { + switch (params->hiz_op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + wm.DepthBufferClear = true; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + wm.DepthBufferResolveEnable = true; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + wm.HierarchicalDepthBufferResolveEnable = true; + break; + case GEN6_HIZ_OP_NONE: + break; + default: + unreachable("not reached"); + } + + if (prog_data) + wm.ThreadDispatchEnable = true; + + if (params->src.bo) + wm.PixelShaderKillPixel = true; + + if (params->dst.surf.samples > 1) { + wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN; + wm.MultisampleDispatchMode = + (prog_data && prog_data->persample_msaa_dispatch) ? + MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL; + } else { + wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; + wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; + } + } + + blorp_emit(brw, GENX(3DSTATE_PS), ps) { + ps.MaximumNumberofThreads = brw->max_wm_threads - 1; + +#if GEN_IS_HASWELL + ps.SampleMask = 1; +#endif + + if (prog_data) { + ps.DispatchGRFStartRegisterforConstantSetupData0 = + prog_data->first_curbe_grf_0; + ps.DispatchGRFStartRegisterforConstantSetupData2 = + prog_data->first_curbe_grf_2; + + ps.KernelStartPointer0 = params->wm_prog_kernel; + ps.KernelStartPointer2 = + params->wm_prog_kernel + prog_data->ksp_offset_2; + + ps._8PixelDispatchEnable = prog_data->dispatch_8; + ps._16PixelDispatchEnable = prog_data->dispatch_16; + + ps.AttributeEnable = prog_data->num_varying_inputs > 0; + } else { + /* Gen7 hardware gets angry if we don't enable at least one dispatch + * mode, so just enable 16-pixel dispatch if we don't have a program. + */ + ps._16PixelDispatchEnable = true; + } + + if (params->src.bo) + ps.SamplerCount = 1; /* Up to 4 samplers */ + + switch (params->fast_clear_op) { + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ + ps.RenderTargetResolveEnable = true; + break; + case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ + ps.RenderTargetFastClearEnable = true; + break; + } + } + +#else /* GEN_GEN <= 6 */ + blorp_emit(brw, GENX(3DSTATE_WM), wm) { wm.MaximumNumberofThreads = brw->max_wm_threads - 1; @@ -414,6 +609,8 @@ blorp_emit_wm_config(struct brw_context *brw, wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; } } + +#endif /* GEN_GEN */ } @@ -423,6 +620,12 @@ blorp_emit_depth_stencil_config(struct brw_context *brw, { brw_emit_depth_stall_flushes(brw); +#if GEN_GEN >= 7 + const uint32_t mocs = 1; /* GEN7_MOCS_L3 */ +#else + const uint32_t mocs = 0; +#endif + blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) { switch (params->depth.surf.dim) { case ISL_SURF_DIM_1D: @@ -438,12 +641,18 @@ blorp_emit_depth_stencil_config(struct brw_context *brw, db.SurfaceFormat = params->depth_format; +#if GEN_GEN >= 7 + db.DepthWriteEnable = true; +#endif + +#if GEN_GEN <= 6 db.TiledSurface = true; db.TileWalk = TILEWALK_YMAJOR; db.MIPMapLayoutMode = MIPLAYOUT_BELOW; + db.SeparateStencilBufferEnable = true; +#endif db.HierarchicalDepthBufferEnable = true; - db.SeparateStencilBufferEnable = true; db.Width = params->depth.surf.logical_level0_px.width - 1; db.Height = params->depth.surf.logical_level0_px.height - 1; @@ -461,6 +670,7 @@ blorp_emit_depth_stencil_config(struct brw_context *brw, .write_domain = I915_GEM_DOMAIN_RENDER, .offset = params->depth.offset, }; + db.DepthBufferMOCS = mocs; } blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) { @@ -471,6 +681,7 @@ blorp_emit_depth_stencil_config(struct brw_context *brw, .write_domain = I915_GEM_DOMAIN_RENDER, .offset = params->depth.aux_offset, }; + hiz.HierarchicalDepthBufferMOCS = mocs; } blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb); @@ -499,6 +710,12 @@ blorp_emit_blend_state(struct brw_context *brw, GENX(BLEND_STATE_length) * 4, 64, &offset); GENX(BLEND_STATE_pack)(NULL, state, &blend); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { + sp.BlendStatePointer = offset; + } +#endif + return offset; } @@ -511,6 +728,12 @@ blorp_emit_color_calc_state(struct brw_context *brw, GENX(COLOR_CALC_STATE_length) * 4, 64, &offset); memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) { + sp.ColorCalcStatePointer = offset; + } +#endif + return offset; } @@ -538,6 +761,12 @@ blorp_emit_depth_stencil_state(struct brw_context *brw, &offset); GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) { + sp.PointertoDEPTH_STENCIL_STATE = offset; + } +#endif + return offset; } @@ -561,10 +790,16 @@ blorp_emit_surface_states(struct brw_context *brw, I915_GEM_DOMAIN_SAMPLER, 0, false); } +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) { + bt.PointertoPSBindingTable = bind_offset; + } +#else blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) { bt.PSBindingTableChange = true; bt.PointertoPSBindingTable = bind_offset; } +#endif } static void @@ -595,12 +830,18 @@ blorp_emit_sampler_state(struct brw_context *brw, GENX(SAMPLER_STATE_length) * 4, 32, &offset); GENX(SAMPLER_STATE_pack)(NULL, state, &sampler); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) { + ssp.PointertoPSSamplerState = offset; + } +#else blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) { ssp.VSSamplerStateChange = true; ssp.GSSamplerStateChange = true; ssp.PSSamplerStateChange = true; ssp.PointertoPSSamplerState = offset; } +#endif } /* 3DSTATE_VIEWPORT_STATE_POINTERS */ @@ -620,10 +861,16 @@ blorp_emit_viewport_state(struct brw_context *brw, .MaximumDepth = 1.0, }); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) { + vsp.CCViewportPointer = cc_vp_offset; + } +#else blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) { vsp.CCViewportStateChange = true; vsp.PointertoCC_VIEWPORT = cc_vp_offset; } +#endif } @@ -644,33 +891,17 @@ genX(blorp_exec)(struct brw_context *brw, uint32_t color_calc_state_offset = 0; uint32_t depth_stencil_state_offset; +#if GEN_GEN == 6 /* Emit workaround flushes when we switch from drawing to blorping. */ brw_emit_post_sync_nonzero_flush(brw); +#endif brw_upload_state_base_address(brw); blorp_emit_vertex_buffers(brw, params); blorp_emit_vertex_elements(brw, params); - /* 3DSTATE_URB - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. - */ - blorp_emit(brw, GENX(3DSTATE_URB), urb) { - urb.VSNumberofURBEntries = brw->urb.max_vs_entries; - } + emit_urb_config(brw, params); if (params->wm_prog_data) { blend_state_offset = blorp_emit_blend_state(brw, params); @@ -678,12 +909,17 @@ genX(blorp_exec)(struct brw_context *brw, } depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params); +#if GEN_GEN <= 6 /* 3DSTATE_CC_STATE_POINTERS * * The pointer offsets are relative to * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. * * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + * + * The dynamic state emit helpers emit their own STATE_POINTERS packets on + * gen7+. However, on gen6 and earlier, they're all lumpped together in + * one CC_STATE_POINTERS packet so we have to emit that here. */ blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) { cc.BLEND_STATEChange = true; @@ -693,11 +929,23 @@ genX(blorp_exec)(struct brw_context *brw, cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset; cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset; } +#else + (void)blend_state_offset; + (void)color_calc_state_offset; + (void)depth_stencil_state_offset; +#endif blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_CONSTANT_HS), hs); + blorp_emit(brw, GENX(3DSTATE_CONSTANT_DS), DS); +#endif blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs); blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps); + if (brw->use_resource_streamer) + gen7_disable_hw_binding_tables(brw); + if (params->wm_prog_data) blorp_emit_surface_states(brw, params); @@ -721,6 +969,12 @@ genX(blorp_exec)(struct brw_context *brw, * We've already done one at the start of the BLORP operation. */ blorp_emit(brw, GENX(3DSTATE_VS), vs); +#if GEN_GEN >= 7 + blorp_emit(brw, GENX(3DSTATE_HS), hs); + blorp_emit(brw, GENX(3DSTATE_TE), te); + blorp_emit(brw, GENX(3DSTATE_DS), DS); + blorp_emit(brw, GENX(3DSTATE_STREAMOUT), so); +#endif blorp_emit(brw, GENX(3DSTATE_GS), gs); blorp_emit(brw, GENX(3DSTATE_CLIP), clip) { @@ -728,7 +982,7 @@ genX(blorp_exec)(struct brw_context *brw, } blorp_emit_sf_config(brw, params); - blorp_emit_wm_config(brw, params); + blorp_emit_ps_config(brw, params); blorp_emit_viewport_state(brw, params); -- cgit v1.1