From 7b36c68ba6899c7f30fd56b7ef07a78b027771ac Mon Sep 17 00:00:00 2001 From: Chad Versace Date: Thu, 26 Jan 2012 11:01:36 -0800 Subject: i965: Rewrite the HiZ op The HiZ op was implemented as a meta-op. This patch reimplements it by emitting a special HiZ batch. This fixes several known bugs, and likely a lot of undiscovered ones too. ==== Why the HiZ meta-op needed to die ==== The HiZ op was implemented as a meta-op, which caused lots of trouble. All other meta-ops occur as a result of some GL call (for example, glClear and glGenerateMipmap), but the HiZ meta-op was special. It was called in places that Mesa (in particular, the vbo and swrast modules) did not expect---and were not prepared for---state changes to occur (for example: glDraw; glCallList; within glBegin/End blocks; and within swrast_prepare_render as a result of intel_miptree_map). In an attempt to work around these unexpected state changes, I added two hooks in i965: - A hook for glDraw, located in brw_predraw_resolve_buffers (which is called in the glDraw path). This hook detected if a predraw resolve meta-op had occurred, and would hackishly repropagate some GL state if necessary. This ensured that the meta-op state changes would not intefere with the vbo module's subsequent execution of glDraw. - A hook for glBegin, implemented by brwPrepareExecBegin. This hook resolved all buffers before entering a glBegin/End block, thus preventing an infinitely recurring call to vbo_exec_FlushVertices. The vbo module calls vbo_exec_FlushVertices to flush its vertex queue in response to GL state changes. Unfortunately, these hooks were not sufficient. The meta-op state changes still interacted badly with glPopAttrib (as discovered in bug 44927) and with swrast rendering (as discovered by debugging gen6's swrast fallback for glBitmap). I expect there are more undiscovered bugs. Rather than play whack-a-mole in a minefield, the sane approach is to replace the HiZ meta-op with something safer. ==== How it was killed ==== This patch consists of several logical components: 1. Rewrite the HiZ op by replacing function gen6_resolve_slice with gen6_hiz_exec and gen7_hiz_exec. The new functions do not call a meta-op, but instead manually construct and emit a batch to "draw" the HiZ op's rectangle primitive. The new functions alter no GL state. 2. Add fields to brw_context::hiz for the new HiZ op. 3. Emit a workaround flush when toggling 3DSTATE_VS.VsFunctionEnable. 4. Kill all dead HiZ code: - the function gen6_resolve_slice - the dirty flag BRW_NEW_HIZ - the dead fields in brw_context::hiz - the state packet manipulation triggered by the now removed brw_context::hiz::op - the meta-op workaround in brw_predraw_resolve_buffers (discussed above) - the meta-op workaround brwPrepareExecBegin (discussed above) Note: This is a candidate for the 8.0 branch. Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke Acked-by: Paul Berry Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=43327 Reported-by: xunx.fang@intel.com Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44927 Reported-by: chao.a.chen@intel.com Signed-off-by: Chad Versace --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_context.c | 55 -- src/mesa/drivers/dri/i965/brw_context.h | 40 +- src/mesa/drivers/dri/i965/brw_draw.c | 47 +- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 - src/mesa/drivers/dri/i965/brw_vtbl.c | 14 +- src/mesa/drivers/dri/i965/gen6_clip_state.c | 20 +- src/mesa/drivers/dri/i965/gen6_depthstencil.c | 9 +- src/mesa/drivers/dri/i965/gen6_hiz.c | 830 +++++++++++++++++--------- src/mesa/drivers/dri/i965/gen6_hiz.h | 38 ++ src/mesa/drivers/dri/i965/gen6_sf_state.c | 16 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 9 + src/mesa/drivers/dri/i965/gen6_wm_state.c | 20 +- src/mesa/drivers/dri/i965/gen7_clip_state.c | 20 +- src/mesa/drivers/dri/i965/gen7_hiz.c | 463 ++++++++++++++ src/mesa/drivers/dri/i965/gen7_hiz.h | 43 ++ src/mesa/drivers/dri/i965/gen7_sf_state.c | 19 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 18 - src/mesa/drivers/dri/i965/junk | 0 19 files changed, 1146 insertions(+), 517 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen7_hiz.c create mode 100644 src/mesa/drivers/dri/i965/gen7_hiz.h create mode 100644 src/mesa/drivers/dri/i965/junk (limited to 'src/mesa/drivers/dri/i965') diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 355bfe2..750be51 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -100,6 +100,7 @@ i965_C_FILES := \ gen7_cc_state.c \ gen7_clip_state.c \ gen7_disable.c \ + gen7_hiz.c \ gen7_misc_state.c \ gen7_sampler_state.c \ gen7_sf_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1ab6310..65de260 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -41,8 +41,6 @@ #include "brw_draw.h" #include "brw_state.h" -#include "gen6_hiz.h" - #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" @@ -57,58 +55,6 @@ * Mesa's Driver Functions ***************************************/ -/** - * \brief Prepare for entry into glBegin/glEnd block. - * - * Resolve buffers before entering a glBegin/glEnd block. This is - * necessary to prevent recursive calls to FLUSH_VERTICES. - * - * This resolves the depth buffer of each enabled depth texture and the HiZ - * buffer of the attached depth renderbuffer. - * - * Details - * ------- - * When vertices are queued during a glBegin/glEnd block, those vertices must - * be drawn before any rendering state changes. To ensure this, Mesa calls - * FLUSH_VERTICES as a prehook to such state changes. Therefore, - * FLUSH_VERTICES itself cannot change rendering state without falling into a - * recursive trap. - * - * This precludes meta-ops, namely buffer resolves, from occurring while any - * vertices are queued. To prevent that situation, we resolve some buffers on - * entering a glBegin/glEnd - * - * \see brwCleanupExecEnd() - */ -static void brwPrepareExecBegin(struct gl_context *ctx) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; - struct intel_renderbuffer *draw_irb; - struct intel_texture_object *tex_obj; - - if (!intel->has_hiz) { - /* The context uses no feature that requires buffer resolves. */ - return; - } - - /* Resolve each enabled texture. */ - for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) - continue; - tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); - if (!tex_obj || !tex_obj->mt) - continue; - intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); - } - - /* Resolve the attached depth buffer. */ - draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); - if (draw_irb) { - intel_renderbuffer_resolve_hiz(intel, draw_irb); - } -} - static void brwInitDriverFunctions(struct intel_screen *screen, struct dd_function_table *functions) { @@ -117,7 +63,6 @@ static void brwInitDriverFunctions(struct intel_screen *screen, brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); - functions->PrepareExecBegin = brwPrepareExecBegin; functions->BeginTransformFeedback = brw_begin_transform_feedback; if (screen->gen >= 7) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c027bef..72e5059 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -119,6 +119,10 @@ #define BRW_MAX_CURBE (32*16) struct brw_context; +struct brw_instruction; +struct brw_vs_prog_key; +struct brw_wm_prog_key; +struct brw_wm_prog_data; enum brw_state_id { BRW_STATE_URB_FENCE, @@ -144,7 +148,6 @@ enum brw_state_id { BRW_STATE_VS_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, - BRW_STATE_HIZ, BRW_STATE_SOL_INDICES, }; @@ -174,7 +177,6 @@ enum brw_state_id { #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) -#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ) #define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES) struct brw_state_flags { @@ -950,38 +952,18 @@ struct brw_context int state_batch_count; /** - * \brief State needed to execute HiZ meta-ops + * \brief State needed to execute HiZ ops. * - * All fields except \c op are initialized by gen6_hiz_init(). + * \see gen6_hiz_init() + * \see gen6_hiz_exec() */ struct brw_hiz_state { - /** - * \brief Indicates which HiZ operation is in progress. + /** \brief VBO for rectangle primitive. * - * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve + * Rather than using glGenBuffers(), we allocate the VBO directly + * through drm. */ - enum brw_hiz_op { - BRW_HIZ_OP_NONE = 0, - BRW_HIZ_OP_DEPTH_CLEAR, - BRW_HIZ_OP_DEPTH_RESOLVE, - BRW_HIZ_OP_HIZ_RESOLVE, - } op; - - /** \brief Shader state */ - struct { - GLuint program; - GLuint position_vbo; - GLint position_location; - } shader; - - /** \brief VAO for the rectangle primitive's vertices. */ - GLuint vao; - - GLuint fbo; - struct gl_renderbuffer *depth_rb; + drm_intel_bo *vertex_bo; } hiz; struct brw_sol_state { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index f50fffd..e919f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -126,12 +126,7 @@ static void gen6_set_prim(struct brw_context *brw, DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); - if (brw->hiz.op) { - assert(prim->mode == GL_TRIANGLES); - hw_prim = _3DPRIM_RECTLIST; - } else { - hw_prim = prim_to_hw_prim[prim->mode]; - } + hw_prim = prim_to_hw_prim[prim->mode]; if (hw_prim != brw->primitive) { brw->primitive = hw_prim; @@ -307,17 +302,11 @@ brw_predraw_resolve_buffers(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct intel_renderbuffer *depth_irb; struct intel_texture_object *tex_obj; - bool did_resolve = false; - - /* Avoid recursive HiZ op. */ - if (brw->hiz.op) { - return; - } /* Resolve the depth buffer's HiZ buffer. */ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); if (depth_irb && depth_irb->mt) { - did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb); + intel_renderbuffer_resolve_hiz(intel, depth_irb); } /* Resolve depth buffer of each enabled depth texture. */ @@ -327,33 +316,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw) tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); if (!tex_obj || !tex_obj->mt) continue; - did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); - } - - if (did_resolve) { - /* Call vbo_bind_array() to synchronize the vbo module's vertex - * attributes to the gl_context's. - * - * Details - * ------- - * The vbo module tracks vertex attributes separately from the - * gl_context. Specifically, the vbo module maintins vertex attributes - * in vbo_exec_context::array::inputs, which is synchronized with - * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array(). - * vbo_draw_arrays() calls vbo_bind_array() to perform the - * synchronization before calling the real draw call, - * vbo_context::draw_arrays. - * - * At this point (after performing a resolve meta-op but before calling - * vbo_bind_array), the gl_context's vertex attributes have been - * restored to their original state (that is, their state before the - * meta-op began), but the vbo module's vertex attribute are those used - * in the last meta-op. Therefore we must manually synchronize the two with - * vbo_bind_array() before continuing with the original draw command. - */ - _mesa_update_state(ctx); - vbo_bind_arrays(ctx); - _mesa_update_state(ctx); + intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); } } @@ -372,9 +335,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); - if (depth_irb && - ctx->Depth.Mask && - !brw->hiz.op) { + if (depth_irb && ctx->Depth.Mask) { intel_renderbuffer_set_needs_depth_resolve(depth_irb); } } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index d071f87..f5e6fdc 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -372,7 +372,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), - DEFINE_BIT(BRW_NEW_HIZ), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index be975d1..724111c 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -50,6 +50,7 @@ #include "brw_wm.h" #include "gen6_hiz.h" +#include "gen7_hiz.h" #include "glsl/ralloc.h" @@ -70,9 +71,11 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + ralloc_free(brw->wm.compile_data); dri_bo_release(&brw->curbe.curbe_bo); + dri_bo_release(&brw->hiz.vertex_bo); dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->wm.const_bo); @@ -236,8 +239,15 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format; if (brw->intel.has_hiz) { - brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice; - brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice; + if (brw->intel.gen == 7) { + brw->intel.vtbl.resolve_depth_slice = gen7_resolve_depth_slice; + brw->intel.vtbl.resolve_hiz_slice = gen7_resolve_hiz_slice; + } else if (brw->intel.gen == 6) { + brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice; + brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice; + } else { + assert(0); + } } if (brw->intel.gen >= 7) { diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index d2a5f75..b3bb8ae 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -67,23 +67,6 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } - if (brw->hiz.op) { - /* HiZ operations emit a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 3D Primitives Overview: - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - return; - } - if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -124,8 +107,7 @@ const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT, .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index d9f686a..4ea517f 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -82,11 +82,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) } /* _NEW_DEPTH */ - if ((ctx->Depth.Test || brw->hiz.op) && depth_irb) { - assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test); - assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test); - assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test); - + if (ctx->Depth.Test && depth_irb) { ds->ds2.depth_test_enable = ctx->Depth.Test; ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func); ds->ds2.depth_write_enable = ctx->Depth.Mask; @@ -98,8 +94,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) const struct brw_tracked_state gen6_depth_stencil_state = { .dirty = { .mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS, - .brw = (BRW_NEW_BATCH | - BRW_NEW_HIZ), + .brw = BRW_NEW_BATCH, .cache = 0, }, .emit = gen6_upload_depth_stencil_state, diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c index d7698ed..3bbd010 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.c +++ b/src/mesa/drivers/dri/i965/gen6_hiz.c @@ -21,345 +21,621 @@ * IN THE SOFTWARE. */ -#include "gen6_hiz.h" - #include -#include "mesa/drivers/common/meta.h" - -#include "mesa/main/arrayobj.h" -#include "mesa/main/bufferobj.h" -#include "mesa/main/depth.h" -#include "mesa/main/enable.h" -#include "mesa/main/fbobject.h" -#include "mesa/main/framebuffer.h" -#include "mesa/main/get.h" -#include "mesa/main/renderbuffer.h" -#include "mesa/main/shaderapi.h" -#include "mesa/main/varray.h" - +#include "intel_batchbuffer.h" #include "intel_fbo.h" #include "intel_mipmap_tree.h" -#include "intel_regions.h" -#include "intel_tex.h" #include "brw_context.h" #include "brw_defines.h" +#include "brw_state.h" -static const uint32_t gen6_hiz_meta_save = - - /* Disable alpha, depth, and stencil test. - * - * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - MESA_META_ALPHA_TEST | - MESA_META_DEPTH_TEST | - MESA_META_STENCIL_TEST | - - /* Disable viewport mapping. - * - * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 - * 3D Primitives Overview: - * RECTLIST: - * Viewport Mapping must be DISABLED (as is typical with the use of - * screen- space coordinates). - * - * We must also manually disable 3DSTATE_SF.Viewport_Transform_Enable. - */ - MESA_META_VIEWPORT | - - /* Disable clipping. - * - * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 - * 3D Primitives Overview: - * Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - MESA_META_CLIP | - - /* Render a solid rectangle (set 3DSTATE_SF.FrontFace_Fill_Mode). - * - * From page 249 of the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, FrontFace_Fill_Mode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - * Also see field BackFace_Fill_Mode. - * - * Note: MESA_META_RASTERIZAION also disables culling, but that is - * irrelevant. See 3DSTATE_SF.Cull_Mode. - */ - MESA_META_RASTERIZATION | - - /* Each HiZ operation uses a vertex shader and VAO. */ - MESA_META_SHADER | - MESA_META_VERTEX | - - /* Disable scissoring. - * - * Scissoring is disabled for resolves because a resolve operation - * should resolve the entire buffer. Scissoring is disabled for depth - * clears because, if we are performing a partial depth clear, then we - * specify the clear region with the RECTLIST vertices. - */ - MESA_META_SCISSOR | - - MESA_META_SELECT_FEEDBACK; +#include "gen6_hiz.h" -static void -gen6_hiz_get_framebuffer_enum(struct gl_context *ctx, - GLenum *bind_enum, - GLenum *get_enum) -{ - if (ctx->Extensions.EXT_framebuffer_blit && ctx->API == API_OPENGL) { - /* Different buffers may be bound to GL_DRAW_FRAMEBUFFER and - * GL_READ_FRAMEBUFFER. Take care to not disrupt the read buffer. - */ - *bind_enum = GL_DRAW_FRAMEBUFFER; - *get_enum = GL_DRAW_FRAMEBUFFER_BINDING; - } else { - /* The enums GL_DRAW_FRAMEBUFFER and GL_READ_FRAMEBUFFER do not exist. - * The bound framebuffer is both the read and draw buffer. - */ - *bind_enum = GL_FRAMEBUFFER; - *get_enum = GL_FRAMEBUFFER_BINDING; - } -} +/** + * \name Constants for HiZ VBO + * \{ + * + * \see brw_context::hiz::vertex_bo + */ +#define GEN6_HIZ_NUM_VERTICES 3 +#define GEN6_HIZ_NUM_VUE_ELEMS 8 +#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \ + * GEN6_HIZ_NUM_VUE_ELEMS \ + * sizeof(float)) +/** \} */ /** - * Initialize static data needed for HiZ operations. + * \brief Initialize data needed for the HiZ op. + * + * This called when executing the first HiZ op. + * \see brw_context::hiz */ -static void +void gen6_hiz_init(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; struct brw_hiz_state *hiz = &brw->hiz; - GLenum fb_bind_enum, fb_get_enum; - if (hiz->fbo != 0) - return; + hiz->vertex_bo = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", + GEN6_HIZ_VBO_SIZE, /* size */ + 64); /* alignment */ - gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum); + if (!hiz->vertex_bo) + _mesa_error(ctx, GL_OUT_OF_MEMORY, "failed to allocate internal VBO"); +} - /* Create depthbuffer. +void +gen6_hiz_emit_batch_head(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct brw_hiz_state *hiz = &brw->hiz; + + /* To ensure that the batch contains only the resolve, flush the batch + * before beginning and after finishing emitting the resolve packets. * - * Until glRenderbufferStorage is called, the renderbuffer hash table - * maps the renderbuffer name to a dummy renderbuffer. We need the - * renderbuffer to be registered in the hash table so that framebuffer - * validation succeeds, so we hackishly allocate storage then immediately - * discard it. + * Ideally, we would not need to flush for the resolve op. But, I suspect + * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in + * a single batch, and there is no safe way to ensure that other than by + * fencing the resolve with flushes. Ideally, we would just detect if + * a batch is in progress and do the right thing, but that would require + * the ability to *safely* access brw_context::state::dirty::brw + * outside of the brw_upload_state() codepath. */ - GLuint depth_rb_name; - _mesa_GenRenderbuffersEXT(1, &depth_rb_name); - _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, depth_rb_name); - _mesa_RenderbufferStorageEXT(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, 32, 32); - _mesa_reference_renderbuffer(&hiz->depth_rb, - _mesa_lookup_renderbuffer(ctx, depth_rb_name)); - intel_miptree_release(&((struct intel_renderbuffer*) hiz->depth_rb)->mt); - - /* Setup FBO. */ - _mesa_GenFramebuffersEXT(1, &hiz->fbo); - _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo); - _mesa_FramebufferRenderbufferEXT(fb_bind_enum, - GL_DEPTH_ATTACHMENT, - GL_RENDERBUFFER, - hiz->depth_rb->Name); - - /* Compile vertex shader. */ - const char *vs_source = - "attribute vec4 position;\n" - "void main()\n" - "{\n" - " gl_Position = position;\n" - "}\n"; - GLuint vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); - _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); - _mesa_CompileShaderARB(vs); - - /* Compile fragment shader. */ - const char *fs_source = "void main() {}"; - GLuint fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); - _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); - _mesa_CompileShaderARB(fs); - - /* Link and use program. */ - hiz->shader.program = _mesa_CreateProgramObjectARB(); - _mesa_AttachShader(hiz->shader.program, vs); - _mesa_AttachShader(hiz->shader.program, fs); - _mesa_LinkProgramARB(hiz->shader.program); - _mesa_UseProgramObjectARB(hiz->shader.program); - - /* Create and bind VAO. */ - _mesa_GenVertexArrays(1, &hiz->vao); - _mesa_BindVertexArray(hiz->vao); - - /* Setup VBO for 'position'. */ - hiz->shader.position_location = - _mesa_GetAttribLocationARB(hiz->shader.program, "position"); - _mesa_GenBuffersARB(1, &hiz->shader.position_vbo); - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, hiz->shader.position_vbo); - _mesa_VertexAttribPointerARB(hiz->shader.position_location, - 2, /*components*/ - GL_FLOAT, - GL_FALSE, /*normalized?*/ - 0, /*stride*/ - NULL); - _mesa_EnableVertexAttribArrayARB(hiz->shader.position_location); - - /* Cleanup. */ - _mesa_DeleteShader(vs); - _mesa_DeleteShader(fs); + intel_flush(ctx); + + /* CMD_PIPELINE_SELECT + * + * Select the 3D pipeline, as opposed to the media pipeline. + */ + { + BEGIN_BATCH(1); + OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16); + ADVANCE_BATCH(); + } + + /* 3DSTATE_MULTISAMPLE */ + { + int length = intel->gen == 7 ? 4 : 3; + + BEGIN_BATCH(length); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(MS_PIXEL_LOCATION_CENTER | + MS_NUMSAMPLES_1); + OUT_BATCH(0); + if (length >= 4) + OUT_BATCH(0); + ADVANCE_BATCH(); + + } + + /* 3DSTATE_SAMPLE_MASK */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(1); + ADVANCE_BATCH(); + } + + /* CMD_STATE_BASE_ADDRESS + * + * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS: + * The following commands must be reissued following any change to the + * base addresses: + * 3DSTATE_CC_POINTERS + * 3DSTATE_BINDING_TABLE_POINTERS + * 3DSTATE_SAMPLER_STATE_POINTERS + * 3DSTATE_VIEWPORT_STATE_POINTERS + * MEDIA_STATE_POINTERS + */ + { + BEGIN_BATCH(10); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); + OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */ + /* SurfaceStateBaseAddress */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* DynamicStateBaseAddress */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + OUT_BATCH(1); /* IndirectObjectBaseAddress */ + OUT_BATCH(1); /* InstructionBaseAddress */ + OUT_BATCH(1); /* GeneralStateUpperBound */ + OUT_BATCH(1); /* DynamicStateUpperBound */ + OUT_BATCH(1); /* IndirectObjectUpperBound*/ + OUT_BATCH(1); /* InstructionAccessUpperBound */ + ADVANCE_BATCH(); + } } -/** - * Wrap \c brw->hiz.depth_rb around a miptree. - * - * \see gen6_hiz_teardown_depth_buffer() - */ -static void -gen6_hiz_setup_depth_buffer(struct brw_context *brw, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer) +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer) { - struct gl_renderbuffer *rb = brw->hiz.depth_rb; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_context *intel = &brw->intel; + struct brw_hiz_state *hiz = &brw->hiz; - rb->Format = mt->format; - rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); - rb->InternalFormat = rb->_BaseFormat; - rb->Width = mt->level[level].width; - rb->Height = mt->level[level].height; + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + */ + { + const int width = mt->level[level].width; + const int height = mt->level[level].height; - irb->mt_level = level; - irb->mt_layer = layer; + const float vertices[GEN6_HIZ_VBO_SIZE] = { + /* v0 */ 0, 0, 0, 0, 0, height, 0, 1, + /* v1 */ 0, 0, 0, 0, width, height, 0, 1, + /* v2 */ 0, 0, 0, 0, 0, 0, 0, 1, + }; - intel_miptree_reference(&irb->mt, mt); - intel_renderbuffer_set_draw_offset(irb); + drm_intel_bo_subdata(hiz->vertex_bo, 0, GEN6_HIZ_VBO_SIZE, vertices); + } + + /* 3DSTATE_VERTEX_BUFFERS */ + { + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (intel->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + /* start address */ + OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + /* end address */ + OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, + 0, hiz->vertex_bo->size - 1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * hiz->vertex_bo is filled with data. + */ + { + const int num_elements = 2; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 16 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + ADVANCE_BATCH(); + } } /** - * Release the region from \c brw->hiz.depth_rb. + * \brief Execute a HiZ op on a miptree slice. + * + * To execute the HiZ op, this function manually constructs and emits a batch + * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed + * before constructing and after emitting the batch. * - * \see gen6_hiz_setup_depth_buffer() + * This function alters no GL state. + * + * For an overview of HiZ ops, see the following sections of the Sandy Bridge + * PRM, Volume 1, Part 2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve */ static void -gen6_hiz_teardown_depth_buffer(struct gl_renderbuffer *rb) -{ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - intel_miptree_release(&irb->mt); -} - -static void -gen6_resolve_slice(struct intel_context *intel, - struct intel_mipmap_tree *mt, - unsigned int level, - unsigned int layer, - enum brw_hiz_op op) +gen6_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) { struct gl_context *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); struct brw_hiz_state *hiz = &brw->hiz; - GLenum fb_bind_enum, fb_get_enum; - - /* Do not recurse. */ - assert(!brw->hiz.op); + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ assert(mt->hiz_mt != NULL); - assert(level >= mt->first_level); - assert(level <= mt->last_level); - assert(layer < mt->level[level].depth); - - gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum); - - /* Save state. */ - GLint save_drawbuffer; - GLint save_renderbuffer; - _mesa_meta_begin(ctx, gen6_hiz_meta_save); - _mesa_GetIntegerv(fb_get_enum, &save_drawbuffer); - _mesa_GetIntegerv(GL_RENDERBUFFER_BINDING, &save_renderbuffer); - - /* Initialize context data for HiZ operations. */ - gen6_hiz_init(brw); - - /* Set depth state. */ - if (!ctx->Depth.Mask) { - /* This sets 3DSTATE_WM.Depth_Buffer_Write_Enable. */ - _mesa_DepthMask(GL_TRUE); + intel_miptree_check_level_layer(mt, level, layer); + + if (hiz->vertex_bo == NULL) + gen6_hiz_init(brw); + + if (hiz->vertex_bo == NULL) { + /* Ouch. Give up. */ + return; } - if (op == BRW_HIZ_OP_DEPTH_RESOLVE) { - _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); - _mesa_DepthFunc(GL_NEVER); + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); } - /* Setup FBO. */ - gen6_hiz_setup_depth_buffer(brw, mt, level, layer); - _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo); + /* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); + } + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } - /* A rectangle primitive (3DPRIM_RECTLIST) consists of only three vertices. - * The vertices reside in screen space with DirectX coordinates (this is, - * (0, 0) is the upper left corner). + /* 3DSTATE_GS * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 + * Disable the geometry shader. */ - const int width = hiz->depth_rb->Width; - const int height = hiz->depth_rb->Height; - const GLfloat positions[] = { - 0, height, - width, height, - 0, 0, - }; - - /* Setup program and vertex attributes. */ - _mesa_UseProgramObjectARB(hiz->shader.program); - _mesa_BindVertexArray(hiz->vao); - _mesa_BindBufferARB(GL_ARRAY_BUFFER, hiz->shader.position_vbo); - _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(positions), positions, - GL_DYNAMIC_DRAW_ARB); - - /* Execute the HiZ operation. */ - brw->hiz.op = op; - brw->state.dirty.brw |= BRW_NEW_HIZ; - _mesa_DrawArrays(GL_TRIANGLES, 0, 3); - brw->state.dirty.brw |= BRW_NEW_HIZ; - brw->hiz.op = BRW_HIZ_OP_NONE; - - /* Restore state. + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. * - * The order in which state is restored is significant. The draw buffer - * used for the HiZ op has no stencil buffer, and glStencilFunc() clamps - * the stencil reference value to the range allowed by the draw buffer's - * number of stencil bits. So, the draw buffer binding must be restored - * before the stencil state, or else the stencil ref will be clamped to 0. + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. */ - gen6_hiz_teardown_depth_buffer(hiz->depth_rb); - _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, save_renderbuffer); - _mesa_BindFramebufferEXT(fb_bind_enum, save_drawbuffer); - _mesa_meta_end(ctx); + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 18; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable thread dispatch (dw5.19) and enable the HiZ op. + * + * Even though thread dispatch is disabled, max threads (dw5.25:31) must be + * nonzero to prevent the GPU from hanging. See the valid ranges in the + * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 + * "Maximum Number Of Threads". + */ + { + uint32_t dw4 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x; + uint32_t tile_y; + uint32_t offset; + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y); + } + + uint32_t format; + switch (mt->format) { + case MESA_FORMAT_Z16: format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + intel_emit_post_sync_nonzero_flush(intel); + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + format << 18 | + 1 << 21 | /* separate stencil enable */ + 1 << 22 | /* hiz enable */ + BRW_TILEWALK_YMAJOR << 26 | + 1 << 27 | /* y-tiled */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | + (width + tile_x - 1) << 6 | + (height + tile_y - 1) << 19); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ + { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; } +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset) +{ + struct gen6_depth_stencil_state *state; + state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + out_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = COMPAREFUNC_NEVER; + } +} + +/** \see intel_context::vtbl::resolve_hiz_slice */ void gen6_resolve_hiz_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_HIZ_RESOLVE); + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); } - +/** \see intel_context::vtbl::resolve_depth_slice */ void gen6_resolve_depth_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_DEPTH_RESOLVE); + gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); } diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.h b/src/mesa/drivers/dri/i965/gen6_hiz.h index 4929012..0a13ba0 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.h +++ b/src/mesa/drivers/dri/i965/gen6_hiz.h @@ -28,6 +28,44 @@ struct intel_context; struct intel_mipmap_tree; +/** + * For an overview of the HiZ operations, see the following sections of the + * Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ +enum gen6_hiz_op { + GEN6_HIZ_OP_DEPTH_CLEAR, + GEN6_HIZ_OP_DEPTH_RESOLVE, + GEN6_HIZ_OP_HIZ_RESOLVE, +}; + +/** + * \name HiZ internals + * \{ + * + * Used internally by gen6_hiz_exec() and gen7_hiz_exec(). + */ + +void +gen6_hiz_init(struct brw_context *brw); + +void +gen6_hiz_emit_batch_head(struct brw_context *brw); + +void +gen6_hiz_emit_vertices(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer); + +void +gen6_hiz_emit_depth_stencil_state(struct brw_context *brw, + enum gen6_hiz_op op, + uint32_t *out_offset); +/** \} */ + void gen6_resolve_hiz_slice(struct intel_context *intel, struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 163b54c..07b8e6d 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -149,17 +149,8 @@ upload_sf_state(struct brw_context *brw) urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - dw2 = GEN6_SF_STATISTICS_ENABLE; - - /* Enable viewport transform only if no HiZ operation is progress - * - * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - */ - if (!brw->hiz.op) - dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw2 = GEN6_SF_STATISTICS_ENABLE | + GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; dw3 = 0; dw4 = 0; @@ -354,8 +345,7 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POINT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 63efaa4..3392a9f 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -133,6 +133,15 @@ upload_vs_state(struct brw_context *brw) struct intel_context *intel = &brw->intel; uint32_t floating_point_mode = 0; + /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS + * command that causes the VS Function Enable to toggle. Pipeline + * flush can be executed by sending a PIPE_CONTROL command with CS + * stall bit set and a post sync operation. + */ + intel_emit_post_sync_nonzero_flush(intel); + if (brw->vs.push_const_size == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3669811..205e648 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -149,23 +149,6 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2); - switch (brw->hiz.op) { - case BRW_HIZ_OP_NONE: - break; - case BRW_HIZ_OP_DEPTH_CLEAR: - dw4 |= GEN6_WM_DEPTH_CLEAR; - break; - case BRW_HIZ_OP_DEPTH_RESOLVE: - dw4 |= GEN6_WM_DEPTH_RESOLVE; - break; - case BRW_HIZ_OP_HIZ_RESOLVE: - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ @@ -233,8 +216,7 @@ const struct brw_tracked_state gen6_wm_state = { _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_HIZ), + BRW_NEW_BATCH), .cache = (CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG) }, diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 9be3ce9..c32cd98 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -39,23 +39,6 @@ upload_clip_state(struct brw_context *brw) /* BRW_NEW_FRAGMENT_PROGRAM */ const struct gl_fragment_program *fprog = brw->fragment_program; - if (brw->hiz.op) { - /* HiZ operations emit a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 3D Primitives Overview: - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - return; - } - /* _NEW_BUFFERS */ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; @@ -133,8 +116,7 @@ const struct brw_tracked_state gen7_clip_state = { _NEW_LIGHT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c new file mode 100644 index 0000000..271e61e --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_hiz.c @@ -0,0 +1,463 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "gen6_hiz.h" +#include "gen7_hiz.h" + +/** + * \copydoc gen6_hiz_exec() + */ +static void +gen7_hiz_exec(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum gen6_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + struct brw_hiz_state *hiz = &brw->hiz; + + assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */ + assert(mt->hiz_mt != NULL); + intel_miptree_check_level_layer(mt, level, layer); + + if (hiz->vertex_bo == NULL) + gen6_hiz_init(brw); + + if (hiz->vertex_bo == NULL) { + /* Ouch. Give up. */ + return; + } + + uint32_t depth_format; + switch (mt->format) { + case MESA_FORMAT_Z16: depth_format = BRW_DEPTHFORMAT_D16_UNORM; break; + case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break; + case MESA_FORMAT_X8_Z24: depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break; + default: assert(0); break; + } + + gen6_hiz_emit_batch_head(brw); + gen6_hiz_emit_vertices(brw, mt, level, layer); + + /* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the + * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ + { + /* The minimum valid value is 32. See 3DSTATE_URB_VS, + * Dword 1.15:0 "VS Number of URB Entries". + */ + int num_vs_entries = 32; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | + 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | + num_vs_entries); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS + * + * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ + { + uint32_t depthstencil_offset; + gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(depthstencil_offset | 1); + ADVANCE_BATCH(); + } + + /* 3DSTATE_VS + * + * Disable vertex shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HS + * + * Disable the hull shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_TE + * + * Disable the tesselation engine. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DS + * + * Disable the domain shader. + */ + { + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_DS << 16 | (6)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_GS + * + * Disable the geometry shader. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ + { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The HiZ op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw1.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) + * and BackFaceFillMode (dw1.4:3) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); + OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SBE */ + { + BEGIN_BATCH(14); + OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); + OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 12; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_WM + * + * Disable PS thread dispatch (dw1.29) and enable the HiZ op. + */ + { + uint32_t dw1 = 0; + + switch (op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw1 |= GEN7_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec + * states that the valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ + { + BEGIN_BATCH(8); + OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH((brw->max_wm_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT); + OUT_BATCH(GEN7_PS_32_DISPATCH_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + uint32_t width = mt->level[level].width; + uint32_t height = mt->level[level].height; + + uint32_t tile_x; + uint32_t tile_y; + uint32_t offset; + { + /* Construct a dummy renderbuffer just to extract tile offsets. */ + struct intel_renderbuffer rb; + rb.mt = mt; + rb.mt_level = level; + rb.mt_layer = layer; + intel_renderbuffer_set_draw_offset(&rb); + offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y); + } + + intel_emit_depth_stall_flushes(intel); + + BEGIN_BATCH(7); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) | + depth_format << 18 | + 1 << 22 | /* hiz enable */ + 1 << 28 | /* depth write */ + BRW_SURFACE_2D << 29); + OUT_RELOC(mt->region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + OUT_BATCH((width + tile_x - 1) << 4 | + (height + tile_y - 1) << 18); + OUT_BATCH(0); + OUT_BATCH(tile_x | + tile_y << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_region *hiz_region = mt->hiz_mt->region; + + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); + OUT_RELOC(hiz_region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_CLEAR_PARAMS + * + * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 + * 3DSTATE_CLEAR_PARAMS: + * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ + { + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_DRAWING_RECTANGLE */ + { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((mt->level[level].width - 1) & 0xffff) | + ((mt->level[level].height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DPRIMITIVE */ + { + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* See comments above at first invocation of intel_flush() in + * gen6_hiz_emit_batch_head(). + */ + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; +} + +/** \copydoc gen6_resolve_hiz_slice() */ +void +gen7_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + +/** \copydoc gen6_resolve_depth_slice() */ +void +gen7_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.h b/src/mesa/drivers/dri/i965/gen7_hiz.h new file mode 100644 index 0000000..b89ffb0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_hiz.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +struct intel_context; +struct intel_mipmap_tree; + +/** \copydoc gen6_resolve_hiz_slice() */ +void +gen7_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +/** \copydoc gen6_resolve_depth_slice() */ +void +gen7_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index da7ef81..b215af2 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -149,8 +149,7 @@ const struct brw_tracked_state gen7_sbe_state = { _NEW_PROGRAM | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ), + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sbe_state, @@ -166,17 +165,8 @@ upload_sf_state(struct brw_context *brw) /* _NEW_BUFFERS */ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - dw1 = GEN6_SF_STATISTICS_ENABLE; - - /* Enable viewport transform only if no HiZ operation is progress - * - * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - */ - if (!brw->hiz.op) - dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw1 = GEN6_SF_STATISTICS_ENABLE | + GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_BUFFERS */ dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); @@ -310,8 +300,7 @@ const struct brw_tracked_state gen7_sf_state = { _NEW_SCISSOR | _NEW_BUFFERS | _NEW_POINT), - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_HIZ), + .brw = BRW_NEW_CONTEXT, .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 32222f9..870590f 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -49,23 +49,6 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; - switch (brw->hiz.op) { - case BRW_HIZ_OP_NONE: - break; - case BRW_HIZ_OP_DEPTH_CLEAR: - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case BRW_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case BRW_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; @@ -106,7 +89,6 @@ const struct brw_tracked_state gen7_wm_state = { .mesa = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON | _NEW_COLOR | _NEW_BUFFERS), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_HIZ | BRW_NEW_BATCH), .cache = 0, }, diff --git a/src/mesa/drivers/dri/i965/junk b/src/mesa/drivers/dri/i965/junk new file mode 100644 index 0000000..e69de29 -- cgit v1.1