diff options
author | Chad Versace <chad.versace@linux.intel.com> | 2011-11-22 10:52:29 -0800 |
---|---|---|
committer | Chad Versace <chad.versace@linux.intel.com> | 2011-11-22 10:52:29 -0800 |
commit | 4ce635c871d00e442efb2b265562685d7edd44ae (patch) | |
tree | b20aebc5d0924a916b92b0b319dc3522fca06799 | |
parent | 1f3c5eae5c4be582e50c2d4d7950424d86059c45 (diff) | |
parent | e5411d8fdc6a7dda18d82746b84197ef83ee0a13 (diff) | |
download | external_mesa3d-4ce635c871d00e442efb2b265562685d7edd44ae.zip external_mesa3d-4ce635c871d00e442efb2b265562685d7edd44ae.tar.gz external_mesa3d-4ce635c871d00e442efb2b265562685d7edd44ae.tar.bz2 |
Merge branch 'hiz' of ssh://people.freedesktop.org/~chadversary/mesa
42 files changed, 2130 insertions, 792 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 86c8051..d29f979 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -885,13 +885,6 @@ i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) return false; } -static void -i830_hiz_resolve_noop(struct intel_context *intel, - struct intel_region *region) -{ - /* empty */ -} - void i830InitVtbl(struct i830_context *i830) { @@ -910,6 +903,4 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; - i830->intel.vtbl.hiz_resolve_depthbuffer = i830_hiz_resolve_noop; - i830->intel.vtbl.hiz_resolve_hizbuffer = i830_hiz_resolve_noop; } diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index fa02dfa..072a692 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -853,13 +853,6 @@ i915_is_hiz_depth_format(struct intel_context *intel, } static void -i915_hiz_resolve_noop(struct intel_context *intel, - struct intel_region *region) -{ - /* empty */ -} - -static void i915_invalidate_state(struct intel_context *intel, GLuint new_state) { struct gl_context *ctx = &intel->ctx; @@ -887,6 +880,4 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.invalidate_state = i915_invalidate_state; i915->intel.vtbl.render_target_supported = i915_render_target_supported; i915->intel.vtbl.is_hiz_depth_format = i915_is_hiz_depth_format; - i915->intel.vtbl.hiz_resolve_depthbuffer = i915_hiz_resolve_noop; - i915->intel.vtbl.hiz_resolve_hizbuffer = i915_hiz_resolve_noop; } diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 1b9ca6f..cd6a8f4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -15,6 +15,7 @@ i965_C_SOURCES := \ intel_fbo.c \ intel_mipmap_tree.c \ intel_regions.c \ + intel_resolve_map.c \ intel_screen.c \ intel_span.c \ intel_pixel.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9b506a6..531ce5b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -33,11 +33,23 @@ #include "main/imports.h" #include "main/macros.h" #include "main/simple_list.h" + +#include "vbo/vbo_context.h" + #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" + +#include "gen6_hiz.h" + +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" #include "intel_span.h" +#include "intel_tex.h" +#include "intel_tex_obj.h" + #include "tnl/t_pipeline.h" #include "glsl/ralloc.h" @@ -45,12 +57,66 @@ * Mesa's Driver Functions ***************************************/ +/** + * \brief Prepare for entry into glBegin/glEnd block. + * + * Resolve buffers before entering a glBegin/glEnd block. This is + * necessary to prevent recursive calls to FLUSH_VERTICES. + * + * This resolves the depth buffer of each enabled depth texture and the HiZ + * buffer of the attached depth renderbuffer. + * + * Details + * ------- + * When vertices are queued during a glBegin/glEnd block, those vertices must + * be drawn before any rendering state changes. To ensure this, Mesa calls + * FLUSH_VERTICES as a prehook to such state changes. Therefore, + * FLUSH_VERTICES itself cannot change rendering state without falling into a + * recursive trap. + * + * This precludes meta-ops, namely buffer resolves, from occurring while any + * vertices are queued. To prevent that situation, we resolve some buffers on + * entering a glBegin/glEnd + * + * \see brwCleanupExecEnd() + */ +static void brwPrepareExecBegin(struct gl_context *ctx) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + struct intel_renderbuffer *draw_irb; + struct intel_texture_object *tex_obj; + + if (!intel->has_hiz) { + /* The context uses no feature that requires buffer resolves. */ + return; + } + + /* Resolve each enabled texture. */ + for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); + } + + /* Resolve the attached depth buffer. */ + draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); + if (draw_irb) { + intel_renderbuffer_resolve_hiz(intel, draw_irb); + } +} + static void brwInitDriverFunctions( struct dd_function_table *functions ) { intelInitDriverFunctions( functions ); brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); + + functions->PrepareExecBegin = brwPrepareExecBegin; } bool diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e5d2d14..ec05fb7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -144,6 +144,7 @@ enum brw_state_id { BRW_STATE_VS_CONSTBUF, BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, + BRW_STATE_HIZ, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -172,6 +173,7 @@ enum brw_state_id { #define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF) #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) +#define BRW_NEW_HIZ (1 << BRW_STATE_HIZ) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -899,6 +901,41 @@ struct brw_context enum state_struct_type type; } *state_batch_list; int state_batch_count; + + /** + * \brief State needed to execute HiZ meta-ops + * + * All fields except \c op are initialized by gen6_hiz_init(). + */ + struct brw_hiz_state { + /** + * \brief Indicates which HiZ operation is in progress. + * + * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + enum brw_hiz_op { + BRW_HIZ_OP_NONE = 0, + BRW_HIZ_OP_DEPTH_CLEAR, + BRW_HIZ_OP_DEPTH_RESOLVE, + BRW_HIZ_OP_HIZ_RESOLVE, + } op; + + /** \brief Shader state */ + struct { + GLuint program; + GLuint position_vbo; + GLint position_location; + } shader; + + /** \brief VAO for the rectangle primitive's vertices. */ + GLuint vao; + + GLuint fbo; + struct gl_renderbuffer *depth_rb; + } hiz; }; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 746c89f..bb79bfb 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -458,10 +458,11 @@ #define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) /* Surface state DW5 */ -#define BRW_SURFACE_X_OFFSET_SHIFT 25 -#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) -#define BRW_SURFACE_Y_OFFSET_SHIFT 20 -#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) +#define BRW_SURFACE_X_OFFSET_SHIFT 25 +#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) +#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) +#define BRW_SURFACE_Y_OFFSET_SHIFT 20 +#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) #define BRW_TEXCOORDMODE_WRAP 0 #define BRW_TEXCOORDMODE_MIRROR 1 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 1571fb7..bf3c95c 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -44,6 +44,9 @@ #include "brw_state.h" #include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" #define FILE_DEBUG_FLAG DEBUG_PRIMS @@ -117,10 +120,17 @@ static void brw_set_prim(struct brw_context *brw, static void gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim) { - uint32_t hw_prim = prim_to_hw_prim[prim->mode]; + uint32_t hw_prim; DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); + if (brw->hiz.op) { + assert(prim->mode == GL_TRIANGLES); + hw_prim = _3DPRIM_RECTLIST; + } else { + hw_prim = prim_to_hw_prim[prim->mode]; + } + if (hw_prim != brw->primitive) { brw->primitive = hw_prim; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; @@ -280,6 +290,93 @@ static void brw_merge_inputs( struct brw_context *brw, brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } +/* + * \brief Resolve buffers before drawing. + * + * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each + * enabled depth texture. + * + * (In the future, this will also perform MSAA resolves). + */ +static void +brw_predraw_resolve_buffers(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct intel_renderbuffer *depth_irb; + struct intel_texture_object *tex_obj; + bool did_resolve = false; + + /* Avoid recursive HiZ op. */ + if (brw->hiz.op) { + return; + } + + /* Resolve the depth buffer's HiZ buffer. */ + depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); + if (depth_irb && depth_irb->mt) { + did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb); + } + + /* Resolve depth buffer of each enabled depth texture. */ + for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); + } + + if (did_resolve) { + /* Call vbo_bind_array() to synchronize the vbo module's vertex + * attributes to the gl_context's. + * + * Details + * ------- + * The vbo module tracks vertex attributes separately from the + * gl_context. Specifically, the vbo module maintins vertex attributes + * in vbo_exec_context::array::inputs, which is synchronized with + * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array(). + * vbo_draw_arrays() calls vbo_bind_array() to perform the + * synchronization before calling the real draw call, + * vbo_context::draw_arrays. + * + * At this point (after performing a resolve meta-op but before calling + * vbo_bind_array), the gl_context's vertex attributes have been + * restored to their original state (that is, their state before the + * meta-op began), but the vbo module's vertex attribute are those used + * in the last meta-op. Therefore we must manually synchronize the two with + * vbo_bind_array() before continuing with the original draw command. + */ + _mesa_update_state(ctx); + vbo_bind_arrays(ctx); + _mesa_update_state(ctx); + } +} + +/** + * \brief Call this after drawing to mark which buffers need resolving + * + * If the depth buffer was written to and if it has an accompanying HiZ + * buffer, then mark that it needs a depth resolve. + * + * (In the future, this will also mark needed MSAA resolves). + */ +static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *depth_irb = + intel_get_renderbuffer(fb, BUFFER_DEPTH); + + if (depth_irb && + ctx->Depth.Mask && + !brw->hiz.op) { + intel_renderbuffer_set_needs_depth_resolve(depth_irb); + } +} + /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -309,6 +406,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx, */ brw_validate_textures( brw ); + /* Resolves must occur after updating state and finalizing textures but + * before setting up any hardware state for this draw call. + */ + brw_predraw_resolve_buffers(brw); + /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -403,6 +505,7 @@ retry: out: brw_state_cache_check_size(brw); + brw_postdraw_set_buffers_need_resolve(brw); return retval; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 514c990..17da460 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -33,6 +33,7 @@ #include "intel_batchbuffer.h" #include "intel_fbo.h" +#include "intel_mipmap_tree.h" #include "intel_regions.h" #include "brw_context.h" @@ -204,9 +205,15 @@ static void emit_depthbuffer(struct brw_context *brw) /* _NEW_BUFFERS */ struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); - struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL; + struct intel_region *hiz_region = NULL; unsigned int len; + if (depth_irb && + depth_irb->mt && + depth_irb->mt->hiz_mt) { + hiz_region = depth_irb->mt->hiz_mt->region; + } + /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both * non-pipelined state that will need the PIPE_CONTROL workaround. */ @@ -272,6 +279,8 @@ static void emit_depthbuffer(struct brw_context *brw) * [DevGT]: This field must be set to the same value (enabled or * disabled) as Hierarchical Depth Buffer Enable */ + struct intel_region *region = stencil_irb->mt->region; + assert(intel->has_separate_stencil); assert(stencil_irb->Base.Format == MESA_FORMAT_S8); @@ -283,8 +292,8 @@ static void emit_depthbuffer(struct brw_context *brw) (BRW_TILEWALK_YMAJOR << 26) | (BRW_SURFACE_2D << 29)); OUT_BATCH(0); - OUT_BATCH(((stencil_irb->region->width - 1) << 6) | - (2 * stencil_irb->region->height - 1) << 19); + OUT_BATCH(((region->width - 1) << 6) | + (2 * region->height - 1) << 19); OUT_BATCH(0); OUT_BATCH(0); @@ -294,7 +303,7 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { - struct intel_region *region = depth_irb->region; + struct intel_region *region = depth_irb->mt->region; unsigned int format; uint32_t tile_x, tile_y, offset; @@ -379,10 +388,11 @@ static void emit_depthbuffer(struct brw_context *brw) /* Emit stencil buffer. */ if (stencil_irb) { + struct intel_region *region = stencil_irb->mt->region; BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1); - OUT_RELOC(stencil_irb->region->bo, + OUT_BATCH(region->pitch * region->cpp - 1); + OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 730a0e8..bd32815 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -368,6 +368,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE), DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS), + DEFINE_BIT(BRW_NEW_HIZ), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index d77bf4d..eaea49b 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -41,39 +41,31 @@ static void brw_miptree_layout_texture_array(struct intel_context *intel, - struct intel_mipmap_tree *mt, - int slices) + struct intel_mipmap_tree *mt) { - GLuint align_w; - GLuint align_h; GLuint level; GLuint qpitch = 0; int h0, h1, q; - intel_get_texture_alignment_unit(mt->format, &align_w, &align_h); - - h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(mt->height0), align_h); - qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); + h0 = ALIGN(mt->height0, mt->align_h); + h1 = ALIGN(minify(mt->height0), mt->align_h); + qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * mt->align_h); if (mt->compressed) qpitch /= 4; - i945_miptree_layout_2d(mt, slices); + i945_miptree_layout_2d(mt); for (level = mt->first_level; level <= mt->last_level; level++) { - for (q = 0; q < slices; q++) { + for (q = 0; q < mt->depth0; q++) { intel_miptree_set_image_offset(mt, level, q, 0, q * qpitch); } } - mt->total_height = qpitch * slices; + mt->total_height = qpitch * mt->depth0; } void brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) { - /* XXX: these vary depending on image format: */ - /* GLint align_w = 4; */ - switch (mt->target) { case GL_TEXTURE_CUBE_MAP: if (intel->gen >= 5) { @@ -82,7 +74,7 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) * pitch of qpitch rows, where qpitch is defined by the equation given * in Volume 1 of the BSpec. */ - brw_miptree_layout_texture_array(intel, mt, 6); + brw_miptree_layout_texture_array(intel, mt); break; } /* FALLTHROUGH */ @@ -94,18 +86,15 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) GLuint pack_x_pitch, pack_x_nr; GLuint pack_y_pitch; GLuint level; - GLuint align_h = 2; - GLuint align_w = 4; mt->total_height = 0; - intel_get_texture_alignment_unit(mt->format, &align_w, &align_h); if (mt->compressed) { - mt->total_width = ALIGN(width, align_w); + mt->total_width = ALIGN(width, mt->align_w); pack_y_pitch = (height + 3) / 4; } else { mt->total_width = mt->width0; - pack_y_pitch = ALIGN(mt->height0, align_h); + pack_y_pitch = ALIGN(mt->height0, mt->align_h); } pack_x_pitch = width; @@ -117,7 +106,7 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) GLint y = 0; GLint q, j; - intel_miptree_set_level_info(mt, level, nr_images, + intel_miptree_set_level_info(mt, level, 0, mt->total_height, width, height, depth); @@ -140,8 +129,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) if (mt->compressed) { pack_y_pitch = (height + 3) / 4; - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); + if (pack_x_pitch > ALIGN(width, mt->align_w)) { + pack_x_pitch = ALIGN(width, mt->align_w); pack_x_nr <<= 1; } } else { @@ -153,7 +142,7 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) if (pack_y_pitch > 2) { pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); + pack_y_pitch = ALIGN(pack_y_pitch, mt->align_h); } } @@ -170,11 +159,11 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt) case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_1D_ARRAY: - brw_miptree_layout_texture_array(intel, mt, mt->depth0); + brw_miptree_layout_texture_array(intel, mt); break; default: - i945_miptree_layout_2d(mt, 1); + i945_miptree_layout_2d(mt); break; } DBG("%s: %dx%dx%d\n", __FUNCTION__, diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 7c40f27..dd2e05a 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -95,7 +95,7 @@ brw_update_draw_buffer(struct intel_context *intel) { struct gl_context *ctx = &intel->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; - struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + struct intel_renderbuffer *irbStencil = NULL; bool fb_has_hiz = intel_framebuffer_has_hiz(fb); if (!fb) { @@ -103,27 +103,7 @@ brw_update_draw_buffer(struct intel_context *intel) return; } - /* - * If intel_context is using separate stencil, but the depth attachment - * (gl_framebuffer.Attachment[BUFFER_DEPTH]) has a packed depth/stencil - * format, then we must install the real depth buffer at fb->_DepthBuffer - * and set fb->_DepthBuffer->Wrapped before calling _mesa_update_framebuffer. - * Otherwise, _mesa_update_framebuffer will create and install a swras - * depth wrapper instead. - * - * Ditto for stencil. - */ - irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH); - if (irbDepth && irbDepth->Base.Format == MESA_FORMAT_X8_Z24) { - _mesa_reference_renderbuffer(&fb->_DepthBuffer, &irbDepth->Base); - irbDepth->Base.Wrapped = fb->Attachment[BUFFER_DEPTH].Renderbuffer; - } - irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL); - if (irbStencil && irbStencil->Base.Format == MESA_FORMAT_S8) { - _mesa_reference_renderbuffer(&fb->_StencilBuffer, &irbStencil->Base); - irbStencil->Base.Wrapped = fb->Attachment[BUFFER_STENCIL].Renderbuffer; - } /* Do this here, not core Mesa, since this function is called from * many places within the driver. @@ -146,7 +126,7 @@ brw_update_draw_buffer(struct intel_context *intel) /* Check some stencil invariants. These should probably be in * emit_depthbuffer(). */ - if (irbStencil && irbStencil->region) { + if (irbStencil && irbStencil->mt) { if (!intel->has_separate_stencil) assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); if (fb_has_hiz || intel->must_use_separate_stencil) @@ -238,12 +218,6 @@ static bool brw_is_hiz_depth_format(struct intel_context *intel, return intel->has_hiz && (format == MESA_FORMAT_X8_Z24); } -static void brw_hiz_resolve_noop(struct intel_context *intel, - struct intel_region *depth_region) -{ - /* empty */ -} - void brwInitVtbl( struct brw_context *brw ) { brw->intel.vtbl.check_vertex_size = 0; @@ -262,11 +236,8 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format; if (brw->intel.has_hiz) { - brw->intel.vtbl.hiz_resolve_hizbuffer = gen6_hiz_resolve_hizbuffer; - brw->intel.vtbl.hiz_resolve_depthbuffer = gen6_hiz_resolve_depthbuffer; - } else { - brw->intel.vtbl.hiz_resolve_hizbuffer = brw_hiz_resolve_noop; - brw->intel.vtbl.hiz_resolve_depthbuffer = brw_hiz_resolve_noop; + brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice; + brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice; } if (brw->intel.gen >= 7) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0cc6201..984a7bc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -262,6 +262,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) struct brw_context *brw = brw_context(ctx); struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct intel_mipmap_tree *mt = intelObj->mt; struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); const GLuint surf_index = SURF_INDEX_TEXTURE(unit); @@ -294,7 +295,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) BRW_SURFACE_PITCH_SHIFT); surf[4] = 0; - surf[5] = 0; + + surf[5] = (mt->align_h == 4) ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, @@ -447,7 +449,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb->region; + struct intel_mipmap_tree *mt = irb->mt; + struct intel_region *region = irb->mt->region; uint32_t *surf; uint32_t tile_x, tile_y; uint32_t format = 0; @@ -509,7 +512,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, assert(tile_x % 4 == 0); assert(tile_y % 2 == 0); surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | - (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT); + (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | + (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); if (intel->gen < 6) { /* _NEW_COLOR */ diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index b3bb8ae..d2a5f75 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -67,6 +67,23 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } + if (brw->hiz.op) { + /* HiZ operations emit a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 3D Primitives Overview: + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + return; + } + if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -107,7 +124,8 @@ const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT, .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM), + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_HIZ), .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c index 72e8687..eec1bf6 100644 --- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c +++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c @@ -77,8 +77,12 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) } /* _NEW_DEPTH */ - if (ctx->Depth.Test) { - ds->ds2.depth_test_enable = 1; + if (ctx->Depth.Test || brw->hiz.op) { + assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test); + assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE || !ctx->Depth.Test); + assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR || !ctx->Depth.Test); + + ds->ds2.depth_test_enable = ctx->Depth.Test; ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func); ds->ds2.depth_write_enable = ctx->Depth.Mask; } @@ -89,7 +93,8 @@ gen6_upload_depth_stencil_state(struct brw_context *brw) const struct brw_tracked_state gen6_depth_stencil_state = { .dirty = { .mesa = _NEW_DEPTH | _NEW_STENCIL, - .brw = BRW_NEW_BATCH, + .brw = (BRW_NEW_BATCH | + BRW_NEW_HIZ), .cache = 0, }, .emit = gen6_upload_depth_stencil_state, diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c index fc6344b..e282511 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.c +++ b/src/mesa/drivers/dri/i965/gen6_hiz.c @@ -25,16 +25,316 @@ #include <assert.h> +#include "mesa/drivers/common/meta.h" + +#include "mesa/main/arrayobj.h" +#include "mesa/main/bufferobj.h" +#include "mesa/main/depth.h" +#include "mesa/main/enable.h" +#include "mesa/main/fbobject.h" +#include "mesa/main/framebuffer.h" +#include "mesa/main/get.h" +#include "mesa/main/renderbuffer.h" +#include "mesa/main/shaderapi.h" +#include "mesa/main/varray.h" + +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_tex.h" + +#include "brw_context.h" +#include "brw_defines.h" + +static const uint32_t gen6_hiz_meta_save = + + /* Disable alpha, depth, and stencil test. + * + * See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + MESA_META_ALPHA_TEST | + MESA_META_DEPTH_TEST | + MESA_META_STENCIL_TEST | + + /* Disable viewport mapping. + * + * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 + * 3D Primitives Overview: + * RECTLIST: + * Viewport Mapping must be DISABLED (as is typical with the use of + * screen- space coordinates). + * + * We must also manually disable 3DSTATE_SF.Viewport_Transform_Enable. + */ + MESA_META_VIEWPORT | + + /* Disable clipping. + * + * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3 + * 3D Primitives Overview: + * Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + */ + MESA_META_CLIP | + + /* Render a solid rectangle (set 3DSTATE_SF.FrontFace_Fill_Mode). + * + * From page 249 of the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, FrontFace_Fill_Mode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + * Also see field BackFace_Fill_Mode. + * + * Note: MESA_META_RASTERIZAION also disables culling, but that is + * irrelevant. See 3DSTATE_SF.Cull_Mode. + */ + MESA_META_RASTERIZATION | + + /* Each HiZ operation uses a vertex shader and VAO. */ + MESA_META_SHADER | + MESA_META_VERTEX | + + /* Disable scissoring. + * + * Scissoring is disabled for resolves because a resolve operation + * should resolve the entire buffer. Scissoring is disabled for depth + * clears because, if we are performing a partial depth clear, then we + * specify the clear region with the RECTLIST vertices. + */ + MESA_META_SCISSOR | + + MESA_META_SELECT_FEEDBACK; + +/** + * Initialize static data needed for HiZ operations. + */ +static void +gen6_hiz_init(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct brw_hiz_state *hiz = &brw->hiz; + + if (hiz->fbo != 0) + return; + + /* Create depthbuffer. + * + * Until glRenderbufferStorage is called, the renderbuffer hash table + * maps the renderbuffer name to a dummy renderbuffer. We need the + * renderbuffer to be registered in the hash table so that framebuffer + * validation succeeds, so we hackishly allocate storage then immediately + * discard it. + */ + GLuint depth_rb_name; + _mesa_GenRenderbuffersEXT(1, &depth_rb_name); + _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, depth_rb_name); + _mesa_RenderbufferStorageEXT(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, 32, 32); + _mesa_reference_renderbuffer(&hiz->depth_rb, + _mesa_lookup_renderbuffer(ctx, depth_rb_name)); + intel_miptree_release(&((struct intel_renderbuffer*) hiz->depth_rb)->mt); + + /* Setup FBO. */ + _mesa_GenFramebuffersEXT(1, &hiz->fbo); + _mesa_BindFramebufferEXT(GL_DRAW_FRAMEBUFFER, hiz->fbo); + _mesa_FramebufferRenderbufferEXT(GL_DRAW_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, + hiz->depth_rb->Name); + + /* Compile vertex shader. */ + const char *vs_source = + "attribute vec4 position;\n" + "void main()\n" + "{\n" + " gl_Position = position;\n" + "}\n"; + GLuint vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER); + _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL); + _mesa_CompileShaderARB(vs); + + /* Compile fragment shader. */ + const char *fs_source = "void main() {}"; + GLuint fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER); + _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL); + _mesa_CompileShaderARB(fs); + + /* Link and use program. */ + hiz->shader.program = _mesa_CreateProgramObjectARB(); + _mesa_AttachShader(hiz->shader.program, vs); + _mesa_AttachShader(hiz->shader.program, fs); + _mesa_LinkProgramARB(hiz->shader.program); + _mesa_UseProgramObjectARB(hiz->shader.program); + + /* Create and bind VAO. */ + _mesa_GenVertexArrays(1, &hiz->vao); + _mesa_BindVertexArray(hiz->vao); + + /* Setup VBO for 'position'. */ + hiz->shader.position_location = + _mesa_GetAttribLocationARB(hiz->shader.program, "position"); + _mesa_GenBuffersARB(1, &hiz->shader.position_vbo); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, hiz->shader.position_vbo); + _mesa_VertexAttribPointerARB(hiz->shader.position_location, + 2, /*components*/ + GL_FLOAT, + GL_FALSE, /*normalized?*/ + 0, /*stride*/ + NULL); + _mesa_EnableVertexAttribArrayARB(hiz->shader.position_location); + + /* Cleanup. */ + _mesa_DeleteShader(vs); + _mesa_DeleteShader(fs); +} + +/** + * Wrap \c brw->hiz.depth_rb around a miptree. + * + * \see gen6_hiz_teardown_depth_buffer() + */ +static void +gen6_hiz_setup_depth_buffer(struct brw_context *brw, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer) +{ + struct gl_renderbuffer *rb = brw->hiz.depth_rb; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + rb->Format = mt->format; + rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); + rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); + rb->InternalFormat = rb->_BaseFormat; + rb->Width = mt->level[level].width; + rb->Height = mt->level[level].height; + + irb->mt_level = level; + irb->mt_layer = layer; + + intel_miptree_reference(&irb->mt, mt); + intel_renderbuffer_set_draw_offset(irb); +} + +/** + * Release the region from \c brw->hiz.depth_rb. + * + * \see gen6_hiz_setup_depth_buffer() + */ +static void +gen6_hiz_teardown_depth_buffer(struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + intel_miptree_release(&irb->mt); +} + +static void +gen6_resolve_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int layer, + enum brw_hiz_op op) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + struct brw_hiz_state *hiz = &brw->hiz; + + /* Do not recurse. */ + assert(!brw->hiz.op); + + assert(mt->hiz_mt != NULL); + assert(level >= mt->first_level); + assert(level <= mt->last_level); + assert(layer < mt->level[level].depth); + + /* Save state. */ + GLint save_drawbuffer; + GLint save_renderbuffer; + _mesa_meta_begin(ctx, gen6_hiz_meta_save); + _mesa_GetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &save_drawbuffer); + _mesa_GetIntegerv(GL_RENDERBUFFER_BINDING, &save_renderbuffer); + + /* Initialize context data for HiZ operations. */ + gen6_hiz_init(brw); + + /* Set depth state. */ + if (!ctx->Depth.Mask) { + /* This sets 3DSTATE_WM.Depth_Buffer_Write_Enable. */ + _mesa_DepthMask(GL_TRUE); + } + if (op == BRW_HIZ_OP_DEPTH_RESOLVE) { + _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE); + _mesa_DepthFunc(GL_NEVER); + } + + /* Setup FBO. */ + gen6_hiz_setup_depth_buffer(brw, mt, level, layer); + _mesa_BindFramebufferEXT(GL_DRAW_FRAMEBUFFER, hiz->fbo); + + + /* A rectangle primitive (3DPRIM_RECTLIST) consists of only three vertices. + * The vertices reside in screen space with DirectX coordinates (this is, + * (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + */ + const int width = hiz->depth_rb->Width; + const int height = hiz->depth_rb->Height; + const GLfloat positions[] = { + 0, height, + width, height, + 0, 0, + }; + + /* Setup program and vertex attributes. */ + _mesa_UseProgramObjectARB(hiz->shader.program); + _mesa_BindVertexArray(hiz->vao); + _mesa_BindBufferARB(GL_ARRAY_BUFFER, hiz->shader.position_vbo); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(positions), positions, + GL_DYNAMIC_DRAW_ARB); + + /* Execute the HiZ operation. */ + brw->hiz.op = op; + brw->state.dirty.brw |= BRW_NEW_HIZ; + _mesa_DrawArrays(GL_TRIANGLES, 0, 3); + brw->state.dirty.brw |= BRW_NEW_HIZ; + brw->hiz.op = BRW_HIZ_OP_NONE; + + /* Restore state. + * + * The order in which state is restored is significant. The draw buffer + * used for the HiZ op has no stencil buffer, and glStencilFunc() clamps + * the stencil reference value to the range allowed by the draw buffer's + * number of stencil bits. So, the draw buffer binding must be restored + * before the stencil state, or else the stencil ref will be clamped to 0. + */ + gen6_hiz_teardown_depth_buffer(hiz->depth_rb); + _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, save_renderbuffer); + _mesa_BindFramebufferEXT(GL_DRAW_FRAMEBUFFER, save_drawbuffer); + _mesa_meta_end(ctx); +} + void -gen6_hiz_resolve_depthbuffer(struct intel_context *intel, - struct intel_region *depth_region) +gen6_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) { - assert("!stub"); + gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_HIZ_RESOLVE); } + void -gen6_hiz_resolve_hizbuffer(struct intel_context *intel, - struct intel_region *depth_region) +gen6_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) { - assert("!stub"); + gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_DEPTH_RESOLVE); } diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.h b/src/mesa/drivers/dri/i965/gen6_hiz.h index 4611182..4929012 100644 --- a/src/mesa/drivers/dri/i965/gen6_hiz.h +++ b/src/mesa/drivers/dri/i965/gen6_hiz.h @@ -23,13 +23,19 @@ #pragma once +#include <stdint.h> + struct intel_context; -struct intel_region; +struct intel_mipmap_tree; void -gen6_hiz_resolve_depthbuffer(struct intel_context *intel, - struct intel_region *depth_region); +gen6_resolve_hiz_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); void -gen6_hiz_resolve_hizbuffer(struct intel_context *intel, - struct intel_region *depth_region); +gen6_resolve_depth_slice(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 67119d8..4c4ff30 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -147,8 +147,19 @@ upload_sf_state(struct brw_context *brw) num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; - dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | - GEN6_SF_STATISTICS_ENABLE; + + dw2 = GEN6_SF_STATISTICS_ENABLE; + + /* Enable viewport transform only if no HiZ operation is progress + * + * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + */ + if (!brw->hiz.op) + dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; + dw3 = 0; dw4 = 0; dw16 = 0; @@ -334,7 +345,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POINT | _NEW_TRANSFORM), .brw = (BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM), + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_HIZ), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 271a9ae..070220a 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -147,6 +147,23 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2); + switch (brw->hiz.op) { + case BRW_HIZ_OP_NONE: + break; + case BRW_HIZ_OP_DEPTH_CLEAR: + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case BRW_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case BRW_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; + } + dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ @@ -215,7 +232,8 @@ const struct brw_tracked_state gen6_wm_state = { _NEW_POLYGON), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_URB_FENCE | - BRW_NEW_BATCH), + BRW_NEW_BATCH | + BRW_NEW_HIZ), .cache = (CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG) }, diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c index 6a3c086..25e1b1d 100644 --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c @@ -22,6 +22,7 @@ */ #include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_fbo.h" #include "brw_context.h" @@ -38,7 +39,7 @@ gen7_depth_format(struct brw_context *brw) struct intel_region *region = NULL; if (drb) - region = drb->region; + region = drb->mt->region; else return BRW_DEPTHFORMAT_D32_FLOAT; @@ -78,7 +79,7 @@ static void emit_depthbuffer(struct brw_context *brw) if (srb == NULL) { dw1 |= (BRW_SURFACE_NULL << 29); } else { - struct intel_region *region = srb->region; + struct intel_region *region = srb->mt->region; /* _NEW_STENCIL: enable stencil buffer writes */ dw1 |= ((ctx->Stencil.WriteMask != 0) << 27); @@ -98,7 +99,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - struct intel_region *region = drb->region; + struct intel_region *region = drb->mt->region; uint32_t tile_x, tile_y, offset; offset = intel_renderbuffer_tile_offsets(drb, &tile_x, &tile_y); @@ -140,8 +141,8 @@ static void emit_depthbuffer(struct brw_context *brw) } else { BEGIN_BATCH(3); OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); - OUT_BATCH(srb->region->pitch * srb->region->cpp - 1); - OUT_RELOC(srb->region->bo, + OUT_BATCH(srb->mt->region->pitch * srb->mt->region->cpp - 1); + OUT_RELOC(srb->mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index f74198b..e1c3910 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -192,7 +192,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb->region; + struct intel_region *region = irb->mt->region; struct gen7_surface_state *surf; uint32_t tile_x, tile_y; diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.c b/src/mesa/drivers/dri/i965/intel_resolve_map.c new file mode 120000 index 0000000..77e50fb --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.c @@ -0,0 +1 @@ +../intel/intel_resolve_map.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 8d7693d..b1a839a 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -267,13 +267,18 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) int x1, y1, x2, y2; uint32_t clear_val; uint32_t BR13, CMD; + struct intel_region *region; int pitch, cpp; drm_intel_bo *aper_array[2]; mask &= ~(1 << buf); irb = intel_get_renderbuffer(fb, buf); - if (irb == NULL || irb->region == NULL || irb->region->bo == NULL) { + if (irb && irb->mt) { + region = irb->mt->region; + assert(region); + assert(region->bo); + } else { fail_mask |= 1 << buf; continue; } @@ -284,12 +289,12 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) x2 = cx + cw + irb->draw_x; y2 = cy + ch + irb->draw_y; - pitch = irb->region->pitch; - cpp = irb->region->cpp; + pitch = region->pitch; + cpp = region->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", __FUNCTION__, - irb->region->bo, (pitch * cpp), + region->bo, (pitch * cpp), x1, y1, x2 - x1, y2 - y1); BR13 = 0xf0 << 16; @@ -305,10 +310,10 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) } } - assert(irb->region->tiling != I915_TILING_Y); + assert(region->tiling != I915_TILING_Y); #ifndef I915 - if (irb->region->tiling != I915_TILING_NONE) { + if (region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -357,7 +362,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) /* do space check before going any further */ aper_array[0] = intel->batch.bo; - aper_array[1] = irb->region->bo; + aper_array[1] = region->bo; if (drm_intel_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)) != 0) { @@ -369,7 +374,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); OUT_BATCH((y2 << 16) | x2); - OUT_RELOC_FENCED(irb->region->bo, + OUT_RELOC_FENCED(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(clear_val); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 75d95b1..4a1a816 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -766,10 +766,10 @@ intel_render_object_purgeable(struct gl_context * ctx, (void) option; intel = intel_renderbuffer(obj); - if (intel->region == NULL) + if (intel->mt == NULL) return GL_RELEASED_APPLE; - return intel_buffer_purgeable(intel->region->bo); + return intel_buffer_purgeable(intel->mt->region->bo); } static GLenum @@ -823,10 +823,10 @@ intel_render_object_unpurgeable(struct gl_context * ctx, (void) option; intel = intel_renderbuffer(obj); - if (intel->region == NULL) + if (intel->mt == NULL) return GL_UNDEFINED_APPLE; - return intel_buffer_unpurgeable(intel->region->bo); + return intel_buffer_unpurgeable(intel->mt->region->bo); } #endif diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 08f5c4d..4632751 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -28,6 +28,7 @@ #include "intel_context.h" #include "intel_buffers.h" #include "intel_fbo.h" +#include "intel_mipmap_tree.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" @@ -40,8 +41,8 @@ intel_drawbuf_region(struct intel_context *intel) { struct intel_renderbuffer *irbColor = intel_renderbuffer(intel->ctx.DrawBuffer->_ColorDrawBuffers[0]); - if (irbColor) - return irbColor->region; + if (irbColor && irbColor->mt) + return irbColor->mt->region; else return NULL; } @@ -54,8 +55,8 @@ intel_readbuf_region(struct intel_context *intel) { struct intel_renderbuffer *irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - if (irb) - return irb->region; + if (irb && irb->mt) + return irb->mt->region; else return NULL; } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index d89b388..9af21c8 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -53,6 +53,7 @@ #include "intel_fbo.h" #include "intel_bufmgr.h" #include "intel_screen.h" +#include "intel_mipmap_tree.h" #include "utils.h" #include "../glsl/ralloc.h" @@ -1153,7 +1154,9 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (!rb) return; - if (rb->region && rb->region->name == buffer->name) + if (rb->mt && + rb->mt->region && + rb->mt->region->name == buffer->name) return; if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { @@ -1167,23 +1170,34 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, if (buffer->attachment == __DRI_BUFFER_STENCIL) { struct intel_renderbuffer *depth_rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); - identify_depth_and_stencil = depth_rb && depth_rb->region; + identify_depth_and_stencil = depth_rb && depth_rb->mt; } if (identify_depth_and_stencil) { if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { fprintf(stderr, "(reusing depth buffer as stencil)\n"); } - intel_region_reference(&rb->region, depth_rb->region); + intel_miptree_reference(&rb->mt, depth_rb->mt); } else { - intel_region_release(&rb->region); - rb->region = intel_region_alloc_for_handle(intel->intelScreen, + intel_miptree_release(&rb->mt); + struct intel_region *region = + intel_region_alloc_for_handle(intel->intelScreen, buffer->cpp, drawable->w, drawable->h, buffer->pitch / buffer->cpp, buffer->name, buffer_name); + if (!region) + return; + + rb->mt = intel_miptree_create_for_region(intel, + GL_TEXTURE_2D, + rb->Base.Format, + region); + intel_region_release(®ion); + if (!rb->mt) + return; } if (buffer->attachment == __DRI_BUFFER_DEPTH_STENCIL) { @@ -1196,7 +1210,7 @@ intel_process_dri2_buffer_no_separate_stencil(struct intel_context *intel, /* The rb passed in is the BUFFER_DEPTH attachment, and we need * to associate this region to BUFFER_STENCIL as well. */ - intel_region_reference(&stencil_rb->region, rb->region); + intel_miptree_reference(&stencil_rb->mt, rb->mt); } } @@ -1338,11 +1352,13 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, /* If the renderbuffer's and DRIbuffer's regions match, then continue. */ if ((buffer->attachment != __DRI_BUFFER_HIZ && - rb->region && - rb->region->name == buffer->name) || + rb->mt && + rb->mt->region && + rb->mt->region->name == buffer->name) || (buffer->attachment == __DRI_BUFFER_HIZ && - rb->hiz_region && - rb->hiz_region->name == buffer->name)) { + rb->mt && + rb->mt->hiz_mt && + rb->mt->hiz_mt->region->name == buffer->name)) { return; } @@ -1371,6 +1387,15 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, buffer_height = drawable->h; } + /* Release the buffer storage now in case we have to return early + * due to failure to allocate new storage. + */ + if (buffer->attachment == __DRI_BUFFER_HIZ) { + intel_miptree_release(&rb->mt->hiz_mt); + } else { + intel_miptree_release(&rb->mt); + } + struct intel_region *region = intel_region_alloc_for_handle(intel->intelScreen, buffer->cpp, @@ -1379,14 +1404,22 @@ intel_process_dri2_buffer_with_separate_stencil(struct intel_context *intel, buffer->pitch / buffer->cpp, buffer->name, buffer_name); + if (!region) + return; + struct intel_mipmap_tree *mt = + intel_miptree_create_for_region(intel, + GL_TEXTURE_2D, + rb->Base.Format, + region); + intel_region_release(®ion); + + /* Associate buffer with new storage. */ if (buffer->attachment == __DRI_BUFFER_HIZ) { - intel_region_reference(&rb->hiz_region, region); + rb->mt->hiz_mt = mt; } else { - intel_region_reference(&rb->region, region); + rb->mt = mt; } - - intel_region_release(®ion); } /** @@ -1463,7 +1496,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_NONE) { + if (stencil_rb->mt->region->tiling == I915_TILING_NONE) { /* * The stencil buffer is actually W tiled. The region's tiling is * I915_TILING_NONE, however, because the GTT is incapable of W @@ -1544,11 +1577,21 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, / depth_stencil_buffer->cpp, depth_stencil_buffer->name, "dri2 depth / stencil buffer"); - intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_DEPTH)->region, - region); - intel_region_reference(&intel_get_renderbuffer(fb, BUFFER_STENCIL)->region, - region); + if (!region) + return; + + struct intel_mipmap_tree *mt = + intel_miptree_create_for_region(intel, + GL_TEXTURE_2D, + depth_stencil_rb->Base.Format, + region); intel_region_release(®ion); + if (!mt) + return; + + intel_miptree_reference(&intel_get_renderbuffer(fb, BUFFER_DEPTH)->mt, mt); + intel_miptree_reference(&intel_get_renderbuffer(fb, BUFFER_STENCIL)->mt, mt); + intel_miptree_release(&mt); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index f2be597..efaf721 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -166,10 +166,15 @@ struct intel_context * - 7.5.3.3 Hierarchical Depth Buffer Resolve * \{ */ - void (*hiz_resolve_depthbuffer)(struct intel_context *intel, - struct intel_region *depth_region); - void (*hiz_resolve_hizbuffer)(struct intel_context *intel, - struct intel_region *depth_region); + void (*resolve_hiz_slice)(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + + void (*resolve_depth_slice)(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); /** \} */ /** diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index a724f1d..dc3e5dd 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -36,6 +36,8 @@ #include "main/renderbuffer.h" #include "main/context.h" #include "main/teximage.h" +#include "main/image.h" + #include "swrast/swrast.h" #include "drivers/common/meta.h" @@ -55,6 +57,25 @@ #define FILE_DEBUG_FLAG DEBUG_FBO +bool +intel_framebuffer_has_hiz(struct gl_framebuffer *fb) +{ + struct intel_renderbuffer *rb = NULL; + if (fb) + rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + return rb && rb->mt && rb->mt->hiz_mt; +} + +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) +{ + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); + if (irb && irb->mt) + return irb->mt->region; + else + return NULL; +} + /** * Create a new framebuffer object. */ @@ -76,8 +97,7 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) ASSERT(irb); - intel_region_release(&irb->region); - intel_region_release(&irb->hiz_region); + intel_miptree_release(&irb->mt); _mesa_reference_renderbuffer(&irb->wrapped_depth, NULL); _mesa_reference_renderbuffer(&irb->wrapped_stencil, NULL); @@ -103,7 +123,12 @@ intel_map_renderbuffer_gtt(struct gl_context *ctx, GLubyte *map; int stride, flip_stride; - assert(irb->region); + assert(irb->mt); + + intel_renderbuffer_resolve_depth(intel, irb); + if (mode & GL_MAP_WRITE_BIT) { + intel_renderbuffer_set_needs_hiz_resolve(irb); + } irb->map_mode = mode; irb->map_x = x; @@ -111,10 +136,10 @@ intel_map_renderbuffer_gtt(struct gl_context *ctx, irb->map_w = w; irb->map_h = h; - stride = irb->region->pitch * irb->region->cpp; + stride = irb->mt->region->pitch * irb->mt->region->cpp; if (rb->Name == 0) { - y = irb->region->height - 1 - y; + y = irb->mt->region->height - 1 - y; flip_stride = -stride; } else { x += irb->draw_x; @@ -122,14 +147,14 @@ intel_map_renderbuffer_gtt(struct gl_context *ctx, flip_stride = stride; } - if (drm_intel_bo_references(intel->batch.bo, irb->region->bo)) { + if (drm_intel_bo_references(intel->batch.bo, irb->mt->region->bo)) { intel_batchbuffer_flush(intel); } - drm_intel_gem_bo_map_gtt(irb->region->bo); + drm_intel_gem_bo_map_gtt(irb->mt->region->bo); - map = irb->region->bo->virtual; - map += x * irb->region->cpp; + map = irb->mt->region->bo->virtual; + map += x * irb->mt->region->cpp; map += (int)y * stride; *out_map = map; @@ -167,10 +192,10 @@ intel_map_renderbuffer_blit(struct gl_context *ctx, int src_x, src_y; int dst_stride; - assert(irb->region); + assert(irb->mt->region); assert(intel->gen >= 6); assert(!(mode & GL_MAP_WRITE_BIT)); - assert(irb->region->tiling == I915_TILING_X); + assert(irb->mt->region->tiling == I915_TILING_X); irb->map_mode = mode; irb->map_x = x; @@ -178,14 +203,14 @@ intel_map_renderbuffer_blit(struct gl_context *ctx, irb->map_w = w; irb->map_h = h; - dst_stride = ALIGN(w * irb->region->cpp, 4); + dst_stride = ALIGN(w * irb->mt->region->cpp, 4); if (rb->Name) { src_x = x + irb->draw_x; src_y = y + irb->draw_y; } else { src_x = x; - src_y = irb->region->height - y - h; + src_y = irb->mt->region->height - y - h; } irb->map_bo = drm_intel_bo_alloc(intel->bufmgr, "MapRenderbuffer() temp", @@ -196,10 +221,10 @@ intel_map_renderbuffer_blit(struct gl_context *ctx, */ if (irb->map_bo && intelEmitCopyBlit(intel, - irb->region->cpp, - irb->region->pitch, irb->region->bo, - 0, irb->region->tiling, - dst_stride / irb->region->cpp, irb->map_bo, + irb->mt->region->cpp, + irb->mt->region->pitch, irb->mt->region->bo, + 0, irb->mt->region->tiling, + dst_stride / irb->mt->region->cpp, irb->map_bo, 0, I915_TILING_NONE, src_x, src_y, 0, 0, @@ -258,7 +283,7 @@ intel_map_renderbuffer_s8(struct gl_context *ctx, uint8_t *untiled_s8_map; assert(rb->Format == MESA_FORMAT_S8); - assert(irb->region); + assert(irb->mt); irb->map_mode = mode; irb->map_x = x; @@ -272,12 +297,12 @@ intel_map_renderbuffer_s8(struct gl_context *ctx, irb->map_buffer = malloc(w * h); untiled_s8_map = irb->map_buffer; - tiled_s8_map = intel_region_map(intel, irb->region, mode); + tiled_s8_map = intel_region_map(intel, irb->mt->region, mode); for (uint32_t pix_y = 0; pix_y < h; pix_y++) { for (uint32_t pix_x = 0; pix_x < w; pix_x++) { uint32_t flipped_y = y_flip * (int32_t)(y + pix_y) + y_bias; - ptrdiff_t offset = intel_offset_S8(irb->region->pitch, + ptrdiff_t offset = intel_offset_S8(irb->mt->region->pitch, x + pix_x, flipped_y); untiled_s8_map[pix_y * w + pix_x] = tiled_s8_map[offset]; @@ -321,11 +346,12 @@ intel_map_renderbuffer_separate_s8z24(struct gl_context *ctx, struct intel_context *intel = intel_context(ctx); struct intel_renderbuffer *irb = intel_renderbuffer(rb); - GLbitfield adjusted_mode; - uint8_t *s8z24_map; int32_t s8z24_stride; + struct intel_renderbuffer *s8_irb; + uint8_t *s8_map; + assert(rb->Name != 0); assert(rb->Format == MESA_FORMAT_S8_Z24); assert(irb->wrapped_depth != NULL); @@ -337,42 +363,29 @@ intel_map_renderbuffer_separate_s8z24(struct gl_context *ctx, irb->map_w = w; irb->map_h = h; - if (mode & GL_MAP_READ_BIT) { - /* Since the caller may read the stencil bits, we must copy the stencil - * buffer's contents into the depth buffer. This necessitates that the - * depth buffer be mapped in write mode. - */ - adjusted_mode = mode | GL_MAP_WRITE_BIT; - } else { - adjusted_mode = mode; - } - + /* Map with write mode for the gather below. */ intel_map_renderbuffer_gtt(ctx, irb->wrapped_depth, - x, y, w, h, adjusted_mode, + x, y, w, h, mode | GL_MAP_WRITE_BIT, &s8z24_map, &s8z24_stride); - if (mode & GL_MAP_READ_BIT) { - struct intel_renderbuffer *s8_irb; - uint8_t *s8_map; - - s8_irb = intel_renderbuffer(irb->wrapped_stencil); - s8_map = intel_region_map(intel, s8_irb->region, GL_MAP_READ_BIT); - - for (uint32_t pix_y = 0; pix_y < h; ++pix_y) { - for (uint32_t pix_x = 0; pix_x < w; ++pix_x) { - ptrdiff_t s8_offset = intel_offset_S8(s8_irb->region->pitch, - x + pix_x, - y + pix_y); - ptrdiff_t s8z24_offset = pix_y * s8z24_stride - + pix_x * 4 - + 3; - s8z24_map[s8z24_offset] = s8_map[s8_offset]; - } + s8_irb = intel_renderbuffer(irb->wrapped_stencil); + s8_map = intel_region_map(intel, s8_irb->mt->region, GL_MAP_READ_BIT); + + /* Gather the stencil buffer into the depth buffer. */ + for (uint32_t pix_y = 0; pix_y < h; ++pix_y) { + for (uint32_t pix_x = 0; pix_x < w; ++pix_x) { + ptrdiff_t s8_offset = intel_offset_S8(s8_irb->mt->region->pitch, + x + pix_x, + y + pix_y); + ptrdiff_t s8z24_offset = pix_y * s8z24_stride + + pix_x * 4 + + 3; + s8z24_map[s8z24_offset] = s8_map[s8_offset]; } - - intel_region_unmap(intel, s8_irb->region); } + intel_region_unmap(intel, s8_irb->mt->region); + *out_map = s8z24_map; *out_stride = s8z24_stride; } @@ -392,7 +405,7 @@ intel_map_renderbuffer(struct gl_context *ctx, struct intel_renderbuffer *irb = intel_renderbuffer(rb); /* We sometimes get called with this by our intel_span.c usage. */ - if (!irb->region && !irb->wrapped_depth) { + if (!irb->mt && !irb->wrapped_depth) { *out_map = NULL; *out_stride = 0; return; @@ -406,7 +419,7 @@ intel_map_renderbuffer(struct gl_context *ctx, out_map, out_stride); } else if (intel->gen >= 6 && !(mode & GL_MAP_WRITE_BIT) && - irb->region->tiling == I915_TILING_X) { + irb->mt->region->tiling == I915_TILING_X) { intel_map_renderbuffer_blit(ctx, rb, x, y, w, h, mode, out_map, out_stride); } else { @@ -438,7 +451,7 @@ intel_unmap_renderbuffer_s8(struct gl_context *ctx, * the real buffer. */ uint8_t *untiled_s8_map = irb->map_buffer; - uint8_t *tiled_s8_map = irb->region->bo->virtual; + uint8_t *tiled_s8_map = irb->mt->region->bo->virtual; /* Flip the Y axis for the default framebuffer. */ int y_flip = (rb->Name == 0) ? -1 : 1; @@ -447,7 +460,7 @@ intel_unmap_renderbuffer_s8(struct gl_context *ctx, for (uint32_t pix_y = 0; pix_y < irb->map_h; pix_y++) { for (uint32_t pix_x = 0; pix_x < irb->map_w; pix_x++) { uint32_t flipped_y = y_flip * (int32_t)(pix_y + irb->map_y) + y_bias; - ptrdiff_t offset = intel_offset_S8(irb->region->pitch, + ptrdiff_t offset = intel_offset_S8(irb->mt->region->pitch, pix_x + irb->map_x, flipped_y); tiled_s8_map[offset] = @@ -456,7 +469,7 @@ intel_unmap_renderbuffer_s8(struct gl_context *ctx, } } - intel_region_unmap(intel, irb->region); + intel_region_unmap(intel, irb->mt->region); free(irb->map_buffer); irb->map_buffer = NULL; } @@ -494,16 +507,16 @@ intel_unmap_renderbuffer_separate_s8z24(struct gl_context *ctx, uint8_t *s8_map; s8_irb = intel_renderbuffer(irb->wrapped_stencil); - s8_map = intel_region_map(intel, s8_irb->region, GL_MAP_WRITE_BIT); + s8_map = intel_region_map(intel, s8_irb->mt->region, GL_MAP_WRITE_BIT); - int32_t s8z24_stride = 4 * s8z24_irb->region->pitch; - uint8_t *s8z24_map = s8z24_irb->region->bo->virtual + int32_t s8z24_stride = 4 * s8z24_irb->mt->region->pitch; + uint8_t *s8z24_map = s8z24_irb->mt->region->bo->virtual + map_y * s8z24_stride + map_x * 4; for (uint32_t pix_y = 0; pix_y < map_h; ++pix_y) { for (uint32_t pix_x = 0; pix_x < map_w; ++pix_x) { - ptrdiff_t s8_offset = intel_offset_S8(s8_irb->region->pitch, + ptrdiff_t s8_offset = intel_offset_S8(s8_irb->mt->region->pitch, map_x + pix_x, map_y + pix_y); ptrdiff_t s8z24_offset = pix_y * s8z24_stride @@ -513,10 +526,10 @@ intel_unmap_renderbuffer_separate_s8z24(struct gl_context *ctx, } } - intel_region_unmap(intel, s8_irb->region); + intel_region_unmap(intel, s8_irb->mt->region); } - drm_intel_gem_bo_unmap_gtt(s8z24_irb->region->bo); + drm_intel_gem_bo_unmap_gtt(s8z24_irb->mt->region->bo); } /** @@ -542,11 +555,11 @@ intel_unmap_renderbuffer(struct gl_context *ctx, irb->map_bo = 0; } else { /* Paired with intel_map_renderbuffer_gtt(). */ - if (irb->region) { - /* The region may be null when intel_map_renderbuffer() is + if (irb->mt) { + /* The miptree may be null when intel_map_renderbuffer() is * called from intel_span.c. */ - drm_intel_gem_bo_unmap_gtt(irb->region->bo); + drm_intel_gem_bo_unmap_gtt(irb->mt->region->bo); } } } @@ -613,13 +626,7 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer intel_flush(ctx); - /* free old region */ - if (irb->region) { - intel_region_release(&irb->region); - } - if (irb->hiz_region) { - intel_region_release(&irb->hiz_region); - } + intel_miptree_release(&irb->mt); DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat), @@ -651,17 +658,18 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * * If we neglect to double the pitch, then render corruption occurs. */ - irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_NONE, - cpp * 2, - ALIGN(width, 64), - ALIGN((height + 1) / 2, 64), - true); - if (!irb->region) - return false; + irb->mt = intel_miptree_create_for_renderbuffer( + intel, + rb->Format, + I915_TILING_NONE, + cpp * 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64)); + if (!irb->mt) + return false; } else if (irb->Base.Format == MESA_FORMAT_S8_Z24 - && intel->must_use_separate_stencil) { + && intel->has_separate_stencil) { bool ok = true; struct gl_renderbuffer *depth_rb; @@ -695,20 +703,16 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer _mesa_reference_renderbuffer(&irb->wrapped_stencil, stencil_rb); } else { - irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, - width, height, true); - if (!irb->region) + irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format, + tiling, cpp, + width, height); + if (!irb->mt) return false; if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { - irb->hiz_region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, - irb->region->cpp, - irb->region->width, - irb->region->height, - true); - if (!irb->hiz_region) { - intel_region_release(&irb->region); + bool ok = intel_miptree_alloc_hiz(intel, irb->mt); + if (!ok) { + intel_miptree_release(&irb->mt); return false; } } @@ -747,7 +751,13 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, } irb = intel_renderbuffer(rb); - intel_region_reference(&irb->region, image->region); + intel_miptree_release(&irb->mt); + irb->mt = intel_miptree_create_for_region(intel, + GL_TEXTURE_2D, + image->format, + image->region); + if (!irb->mt) + return; rb->InternalFormat = image->internal_format; rb->Width = image->region->width; @@ -938,117 +948,141 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, intel_draw_buffer(ctx); } -static bool -intel_update_tex_wrapper_regions(struct intel_context *intel, - struct intel_renderbuffer *irb, - struct intel_texture_image *intel_image); +static struct intel_renderbuffer* +intel_renderbuffer_wrap_miptree(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + gl_format format, + GLenum internal_format); +/** + * \par Special case for separate stencil + * + * When wrapping a depthstencil texture that uses separate stencil, this + * function is recursively called twice: once to create \c + * irb->wrapped_depth and again to create \c irb->wrapped_stencil. On the + * call to create \c irb->wrapped_depth, the \c format and \c + * internal_format parameters do not match \c mt->format. In that case, \c + * mt->format is MESA_FORMAT_S8_Z24 and \c format is \c + * MESA_FORMAT_X8_Z24. + * + * @return true on success + */ static bool -intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, - struct gl_texture_image *texImage) +intel_renderbuffer_update_wrapper(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + gl_format format, + GLenum internal_format) { - struct intel_context *intel = intel_context(ctx); - struct intel_texture_image *intel_image = intel_texture_image(texImage); - int width, height, depth; + struct gl_renderbuffer *rb = &irb->Base; - if (!intel_span_supports_format(texImage->TexFormat)) { + rb->Format = format; + if (!intel_span_supports_format(rb->Format)) { DBG("Render to texture BAD FORMAT %s\n", - _mesa_get_format_name(texImage->TexFormat)); + _mesa_get_format_name(rb->Format)); return false; } else { - DBG("Render to texture %s\n", _mesa_get_format_name(texImage->TexFormat)); + DBG("Render to texture %s\n", _mesa_get_format_name(rb->Format)); } - intel_miptree_get_dimensions_for_image(texImage, &width, &height, &depth); - - irb->Base.Format = texImage->TexFormat; - irb->Base.DataType = intel_mesa_format_to_rb_datatype(texImage->TexFormat); - irb->Base.InternalFormat = texImage->InternalFormat; - irb->Base._BaseFormat = _mesa_base_tex_format(ctx, irb->Base.InternalFormat); - irb->Base.Width = width; - irb->Base.Height = height; + rb->InternalFormat = internal_format; + rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); + rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); + rb->Width = mt->level[level].width; + rb->Height = mt->level[level].height; irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; - if (intel_image->stencil_rb) { - /* The tex image has packed depth/stencil format, but is using separate - * stencil. */ - - bool ok; - struct intel_renderbuffer *depth_irb = - intel_renderbuffer(intel_image->depth_rb); - - /* Update the hiz region if necessary. */ - ok = intel_update_tex_wrapper_regions(intel, depth_irb, intel_image); - if (!ok) { - return false; + intel_miptree_check_level_layer(mt, level, layer); + irb->mt_level = level; + irb->mt_layer = layer; + + if (mt->stencil_mt && _mesa_is_depthstencil_format(rb->InternalFormat)) { + assert((irb->wrapped_depth == NULL) == (irb->wrapped_stencil == NULL)); + + struct intel_renderbuffer *depth_irb; + struct intel_renderbuffer *stencil_irb; + + if (!irb->wrapped_depth) { + depth_irb = intel_renderbuffer_wrap_miptree(intel, + mt, level, layer, + MESA_FORMAT_X8_Z24, + GL_DEPTH_COMPONENT24); + stencil_irb = intel_renderbuffer_wrap_miptree(intel, + mt->stencil_mt, + level, layer, + MESA_FORMAT_S8, + GL_STENCIL_INDEX8); + _mesa_reference_renderbuffer(&irb->wrapped_depth, &depth_irb->Base); + _mesa_reference_renderbuffer(&irb->wrapped_stencil, &stencil_irb->Base); + + if (!irb->wrapped_depth || !irb->wrapped_stencil) + return false; + } else { + bool ok = true; + + depth_irb = intel_renderbuffer(irb->wrapped_depth); + stencil_irb = intel_renderbuffer(irb->wrapped_stencil); + + ok &= intel_renderbuffer_update_wrapper(intel, + depth_irb, + mt, + level, layer, + MESA_FORMAT_X8_Z24, + GL_DEPTH_COMPONENT24); + ok &= intel_renderbuffer_update_wrapper(intel, + stencil_irb, + mt->stencil_mt, + level, layer, + MESA_FORMAT_S8, + GL_STENCIL_INDEX8); + if (!ok) + return false; } - - /* The tex image shares its embedded depth and stencil renderbuffers with - * the renderbuffer wrapper. */ - _mesa_reference_renderbuffer(&irb->wrapped_depth, - intel_image->depth_rb); - _mesa_reference_renderbuffer(&irb->wrapped_stencil, - intel_image->stencil_rb); - - return true; } else { - return intel_update_tex_wrapper_regions(intel, irb, intel_image); - } -} - -/** - * FIXME: The handling of the hiz region is broken for mipmapped depth textures - * FIXME: because intel_finalize_mipmap_tree is unaware of it. - */ -static bool -intel_update_tex_wrapper_regions(struct intel_context *intel, - struct intel_renderbuffer *irb, - struct intel_texture_image *intel_image) -{ - struct gl_renderbuffer *rb = &irb->Base; - - /* Point the renderbuffer's region to the texture's region. */ - if (irb->region != intel_image->mt->region) { - intel_region_reference(&irb->region, intel_image->mt->region); - } - - /* Allocate the texture's hiz region if necessary. */ - if (intel->vtbl.is_hiz_depth_format(intel, rb->Format) - && !intel_image->mt->hiz_region) { - intel_image->mt->hiz_region = - intel_region_alloc(intel->intelScreen, - I915_TILING_Y, - _mesa_get_format_bytes(rb->Format), - rb->Width, - rb->Height, - true); - if (!intel_image->mt->hiz_region) - return false; - } - - /* Point the renderbuffer's hiz region to the texture's hiz region. */ - if (irb->hiz_region != intel_image->mt->hiz_region) { - intel_region_reference(&irb->hiz_region, intel_image->mt->hiz_region); + intel_miptree_reference(&irb->mt, mt); + intel_renderbuffer_set_draw_offset(irb); + + if (mt->hiz_mt == NULL && + intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + intel_miptree_alloc_hiz(intel, mt); + if (!mt->hiz_mt) + return false; + } } return true; } - /** - * When glFramebufferTexture[123]D is called this function sets up the - * gl_renderbuffer wrapper around the texture image. - * This will have the region info needed for hardware rendering. + * \brief Wrap a renderbuffer around a single slice of a miptree. + * + * Called by glFramebufferTexture*(). This just allocates a + * ``struct intel_renderbuffer`` then calls + * intel_renderbuffer_update_wrapper() to do the real work. + * + * \see intel_renderbuffer_update_wrapper() */ -static struct intel_renderbuffer * -intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) +static struct intel_renderbuffer* +intel_renderbuffer_wrap_miptree(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + gl_format format, + GLenum internal_format) + { const GLuint name = ~0; /* not significant, but distinct for debugging */ + struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb; - /* make an intel_renderbuffer to wrap the texture image */ + intel_miptree_check_level_layer(mt, level, layer); + irb = CALLOC_STRUCT(intel_renderbuffer); if (!irb) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture"); @@ -1058,7 +1092,9 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) _mesa_init_renderbuffer(&irb->Base, name); irb->Base.ClassID = INTEL_RB_CLASS; - if (!intel_update_wrapper(ctx, irb, texImage)) { + if (!intel_renderbuffer_update_wrapper(intel, irb, + mt, level, layer, + format, internal_format)) { free(irb); return NULL; } @@ -1067,17 +1103,15 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) } void -intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, - struct intel_texture_image *intel_image, - int zoffset) +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb) { unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ - intel_miptree_get_image_offset(intel_image->mt, - intel_image->base.Base.Level, - intel_image->base.Base.Face, - zoffset, + intel_miptree_get_image_offset(irb->mt, + irb->mt_level, + 0, /* face, which we ignore */ + irb->mt_layer, &dst_x, &dst_y); irb->draw_x = dst_x; @@ -1099,20 +1133,21 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, uint32_t *tile_x, uint32_t *tile_y) { - int cpp = irb->region->cpp; - uint32_t pitch = irb->region->pitch * cpp; + struct intel_region *region = irb->mt->region; + int cpp = region->cpp; + uint32_t pitch = region->pitch * cpp; - if (irb->region->tiling == I915_TILING_NONE) { + if (region->tiling == I915_TILING_NONE) { *tile_x = 0; *tile_y = 0; return irb->draw_x * cpp + irb->draw_y * pitch; - } else if (irb->region->tiling == I915_TILING_X) { + } else if (region->tiling == I915_TILING_X) { *tile_x = irb->draw_x % (512 / cpp); *tile_y = irb->draw_y % 8; return ((irb->draw_y / 8) * (8 * pitch) + (irb->draw_x - *tile_x) / (512 / cpp) * 4096); } else { - assert(irb->region->tiling == I915_TILING_Y); + assert(region->tiling == I915_TILING_Y); *tile_x = irb->draw_x % (128 / cpp); *tile_y = irb->draw_y % 32; return ((irb->draw_y / 32) * (32 * pitch) + @@ -1147,12 +1182,22 @@ intel_render_texture(struct gl_context * ctx, struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att) { + struct intel_context *intel = intel_context(ctx); struct gl_texture_image *image = _mesa_get_attachment_teximage(att); struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_mipmap_tree *mt = intel_image->mt; (void) fb; + int layer; + if (att->CubeMapFace > 0) { + assert(att->Zoffset == 0); + layer = att->CubeMapFace; + } else { + layer = att->Zoffset; + } + if (!intel_image->mt) { /* Fallback on drawing to a texture that doesn't have a miptree * (has a border, width/height 0, etc.) @@ -1162,7 +1207,13 @@ intel_render_texture(struct gl_context * ctx, return; } else if (!irb) { - irb = intel_wrap_texture(ctx, image); + irb = intel_renderbuffer_wrap_miptree(intel, + mt, + att->TextureLevel, + layer, + image->TexFormat, + image->InternalFormat); + if (irb) { /* bind the wrapper to the attachment point */ _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base); @@ -1174,7 +1225,10 @@ intel_render_texture(struct gl_context * ctx, } } - if (!intel_update_wrapper(ctx, irb, image)) { + if (!intel_renderbuffer_update_wrapper(intel, irb, + mt, att->TextureLevel, layer, + image->TexFormat, + image->InternalFormat)) { _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _swrast_render_texture(ctx, fb, att); return; @@ -1185,7 +1239,6 @@ intel_render_texture(struct gl_context * ctx, att->Texture->Name, image->Width, image->Height, irb->Base.RefCount); - intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); intel_image->used_as_render_target = true; #ifndef I915 @@ -1211,9 +1264,9 @@ intel_render_texture(struct gl_context * ctx, true); intel_miptree_copy_teximage(intel, intel_image, new_mt); - intel_renderbuffer_set_draw_offset(irb, intel_image, att->Zoffset); + intel_renderbuffer_set_draw_offset(irb); - intel_region_reference(&irb->region, intel_image->mt->region); + intel_miptree_reference(&irb->mt, intel_image->mt); intel_miptree_release(&new_mt); } #endif @@ -1395,6 +1448,69 @@ intel_blit_framebuffer(struct gl_context *ctx, mask, filter); } +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_hiz_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } else if (irb->wrapped_depth) { + intel_renderbuffer_set_needs_hiz_resolve( + intel_renderbuffer(irb->wrapped_depth)); + } else { + return; + } +} + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_depth_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } else if (irb->wrapped_depth) { + intel_renderbuffer_set_needs_depth_resolve( + intel_renderbuffer(irb->wrapped_depth)); + } else { + return; + } +} + +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_hiz(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + if (irb->wrapped_depth) + return intel_renderbuffer_resolve_hiz(intel, + intel_renderbuffer(irb->wrapped_depth)); + + return false; +} + +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_depth(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + + if (irb->wrapped_depth) + return intel_renderbuffer_resolve_depth(intel, + intel_renderbuffer(irb->wrapped_depth)); + + return false; +} + /** * Do one-time context initializations related to GL_EXT_framebuffer_object. * Hook in device driver functions. diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 4cc57fe..bb94309 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -35,6 +35,7 @@ #include "intel_screen.h" struct intel_context; +struct intel_mipmap_tree; struct intel_texture_image; /** @@ -43,14 +44,29 @@ struct intel_texture_image; struct intel_renderbuffer { struct gl_renderbuffer Base; - struct intel_region *region; + struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */ drm_intel_bo *map_bo; + void *map_buffer; GLuint map_x, map_y, map_w, map_h; GLbitfield map_mode; - /** Only used by depth renderbuffers for which HiZ is enabled. */ - struct intel_region *hiz_region; + /** + * \name Miptree view + * \{ + * + * Multiple renderbuffers may simultaneously wrap a single texture and each + * provide a different view into that texture. The fields below indicate + * which miptree slice is wrapped by this renderbuffer. The fields' values + * are consistent with the 'level' and 'layer' parameters of + * glFramebufferTextureLayer(). + * + * For renderbuffers not created with glFramebufferTexture*(), mt_level and + * mt_layer are 0. + */ + unsigned int mt_level; + unsigned int mt_layer; + /** \} */ /** * \name Packed depth/stencil unwrappers @@ -134,28 +150,8 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) return irb; } -/** - * If the framebuffer has a depth buffer attached, then return its HiZ region. - * The HiZ region may be null. - */ -static INLINE struct intel_region* -intel_framebuffer_get_hiz_region(struct gl_framebuffer *fb) -{ - struct intel_renderbuffer *rb = NULL; - if (fb) - rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); - - if (rb) - return rb->hiz_region; - else - return NULL; -} - -static INLINE bool -intel_framebuffer_has_hiz(struct gl_framebuffer *fb) -{ - return intel_framebuffer_get_hiz_region(fb) != NULL; -} +bool +intel_framebuffer_has_hiz(struct gl_framebuffer *fb); extern struct intel_renderbuffer * intel_create_renderbuffer(gl_format format); @@ -179,23 +175,45 @@ extern void intel_flip_renderbuffers(struct gl_framebuffer *fb); void -intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, - struct intel_texture_image *intel_image, - int zoffset); +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb); uint32_t intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, uint32_t *tile_x, uint32_t *tile_y); -static INLINE struct intel_region * -intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) -{ - struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); - if (irb) - return irb->region; - else - return NULL; -} +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex); + +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb); + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb); + + +/** + * \brief Perform a HiZ resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb); + +/** + * \brief Perform a depth resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb); #endif /* INTEL_FBO_H */ diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 19f151f..f8ef262 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -25,19 +25,23 @@ * **************************************************************************/ +#include "intel_batchbuffer.h" #include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" +#include "intel_resolve_map.h" +#include "intel_span.h" #include "intel_tex_layout.h" #include "intel_tex.h" #include "intel_blit.h" + #include "main/enums.h" #include "main/formats.h" +#include "main/image.h" #include "main/teximage.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE - static GLenum target_to_target(GLenum target) { @@ -54,7 +58,6 @@ target_to_target(GLenum target) } } - static struct intel_mipmap_tree * intel_miptree_create_internal(struct intel_context *intel, GLenum target, @@ -82,11 +85,30 @@ intel_miptree_create_internal(struct intel_context *intel, mt->last_level = last_level; mt->width0 = width0; mt->height0 = height0; - mt->depth0 = depth0; mt->cpp = compress_byte ? compress_byte : _mesa_get_format_bytes(mt->format); mt->compressed = compress_byte ? 1 : 0; mt->refcount = 1; + intel_get_texture_alignment_unit(intel, format, + &mt->align_w, &mt->align_h); + + if (target == GL_TEXTURE_CUBE_MAP) { + assert(depth0 == 1); + mt->depth0 = 6; + } else { + mt->depth0 = depth0; + } + + if (format == MESA_FORMAT_S8) { + /* The stencil buffer has quirky pitch requirements. From Vol 2a, + * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": + * The pitch must be set to 2x the value computed based on width, as + * the stencil buffer is stored with two rows interleaved. + */ + assert(intel->has_separate_stencil); + mt->cpp = 2; + } + #ifdef I915 (void) intel; if (intel->is_945) @@ -97,6 +119,23 @@ intel_miptree_create_internal(struct intel_context *intel, brw_miptree_layout(intel, mt); #endif + if (intel->has_separate_stencil && + _mesa_is_depthstencil_format(_mesa_get_format_base_format(format))) { + mt->stencil_mt = intel_miptree_create(intel, + mt->target, + MESA_FORMAT_S8, + mt->first_level, + mt->last_level, + mt->width0, + mt->height0, + mt->depth0, + true); + if (!mt->stencil_mt) { + intel_miptree_release(&mt); + return NULL; + } + } + return mt; } @@ -121,6 +160,8 @@ intel_miptree_create(struct intel_context *intel, (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) tiling = I915_TILING_Y; + else if (format == MESA_FORMAT_S8) + tiling = I915_TILING_NONE; else if (width0 >= 64) tiling = I915_TILING_X; } @@ -171,6 +212,27 @@ intel_miptree_create_for_region(struct intel_context *intel, return mt; } +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t tiling, + uint32_t cpp, + uint32_t width, + uint32_t height) +{ + struct intel_region *region; + struct intel_mipmap_tree *mt; + + region = intel_region_alloc(intel->intelScreen, + tiling, cpp, width, height, true); + if (!region) + return NULL; + + mt = intel_miptree_create_for_region(intel, GL_TEXTURE_2D, format, region); + intel_region_release(®ion); + return mt; +} + void intel_miptree_reference(struct intel_mipmap_tree **dst, struct intel_mipmap_tree *src) @@ -202,11 +264,12 @@ intel_miptree_release(struct intel_mipmap_tree **mt) DBG("%s deleting %p\n", __FUNCTION__, *mt); intel_region_release(&((*mt)->region)); - intel_region_release(&((*mt)->hiz_region)); + intel_miptree_release(&(*mt)->stencil_mt); + intel_miptree_release(&(*mt)->hiz_mt); + intel_resolve_map_clear(&(*mt)->hiz_map); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { - free((*mt)->level[i].x_offset); - free((*mt)->level[i].y_offset); + free((*mt)->level[i].slice); } free(*mt); @@ -267,7 +330,6 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, GLuint level, - GLuint nr_images, GLuint x, GLuint y, GLuint w, GLuint h, GLuint d) { @@ -276,18 +338,15 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt, mt->level[level].depth = d; mt->level[level].level_x = x; mt->level[level].level_y = y; - mt->level[level].nr_images = nr_images; DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__, level, w, h, d, x, y); - assert(nr_images); - assert(!mt->level[level].x_offset); + assert(mt->level[level].slice == NULL); - mt->level[level].x_offset = malloc(nr_images * sizeof(GLuint)); - mt->level[level].x_offset[0] = mt->level[level].level_x; - mt->level[level].y_offset = malloc(nr_images * sizeof(GLuint)); - mt->level[level].y_offset[0] = mt->level[level].level_y; + mt->level[level].slice = malloc(d * sizeof(*mt->level[0].slice)); + mt->level[level].slice[0].x_offset = mt->level[level].level_x; + mt->level[level].slice[0].y_offset = mt->level[level].level_y; } @@ -299,38 +358,110 @@ intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, if (img == 0 && level == 0) assert(x == 0 && y == 0); - assert(img < mt->level[level].nr_images); + assert(img < mt->level[level].depth); - mt->level[level].x_offset[img] = mt->level[level].level_x + x; - mt->level[level].y_offset[img] = mt->level[level].level_y + y; + mt->level[level].slice[img].x_offset = mt->level[level].level_x + x; + mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; DBG("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, - mt->level[level].x_offset[img], mt->level[level].y_offset[img]); + mt->level[level].slice[img].x_offset, + mt->level[level].slice[img].y_offset); } +/** + * For cube map textures, either the \c face parameter can be used, of course, + * or the cube face can be interpreted as a depth layer and the \c layer + * parameter used. + */ void intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, - GLuint level, GLuint face, GLuint depth, + GLuint level, GLuint face, GLuint layer, GLuint *x, GLuint *y) { - switch (mt->target) { - case GL_TEXTURE_CUBE_MAP_ARB: - *x = mt->level[level].x_offset[face]; - *y = mt->level[level].y_offset[face]; - break; - case GL_TEXTURE_3D: - case GL_TEXTURE_2D_ARRAY_EXT: - case GL_TEXTURE_1D_ARRAY_EXT: - assert(depth < mt->level[level].nr_images); - *x = mt->level[level].x_offset[depth]; - *y = mt->level[level].y_offset[depth]; - break; - default: - *x = mt->level[level].x_offset[0]; - *y = mt->level[level].y_offset[0]; - break; + int slice; + + if (face > 0) { + assert(mt->target == GL_TEXTURE_CUBE_MAP); + assert(face < 6); + assert(layer == 0); + slice = face; + } else { + /* This branch may be taken even if the texture target is a cube map. In + * that case, the caller chose to interpret each cube face as a layer. + */ + assert(face == 0); + slice = layer; + } + + *x = mt->level[level].slice[slice].x_offset; + *y = mt->level[level].slice[slice].y_offset; +} + +static void +intel_miptree_copy_slice(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int face, + int depth) + +{ + gl_format format = src_mt->format; + uint32_t width = src_mt->level[level].width; + uint32_t height = src_mt->level[level].height; + + assert(depth < src_mt->level[level].depth); + + if (dst_mt->compressed) { + height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; + width = ALIGN(width, dst_mt->align_w); + } + + uint32_t dst_x, dst_y, src_x, src_y; + intel_miptree_get_image_offset(dst_mt, level, face, depth, + &dst_x, &dst_y); + intel_miptree_get_image_offset(src_mt, level, face, depth, + &src_x, &src_y); + + DBG("validate blit mt %p %d,%d/%d -> mt %p %d,%d/%d (%dx%d)\n", + src_mt, src_x, src_y, src_mt->region->pitch * src_mt->region->cpp, + dst_mt, dst_x, dst_y, dst_mt->region->pitch * dst_mt->region->cpp, + width, height); + + if (!intelEmitCopyBlit(intel, + dst_mt->region->cpp, + src_mt->region->pitch, src_mt->region->bo, + 0, src_mt->region->tiling, + dst_mt->region->pitch, dst_mt->region->bo, + 0, dst_mt->region->tiling, + src_x, src_y, + dst_x, dst_y, + width, height, + GL_COPY)) { + + fallback_debug("miptree validate blit for %s failed\n", + _mesa_get_format_name(format)); + void *dst = intel_region_map(intel, dst_mt->region, GL_MAP_WRITE_BIT); + void *src = intel_region_map(intel, src_mt->region, GL_MAP_READ_BIT); + + _mesa_copy_rect(dst, + dst_mt->cpp, + dst_mt->region->pitch, + dst_x, dst_y, + width, height, + src, src_mt->region->pitch, + src_x, src_y); + + intel_region_unmap(intel, dst_mt->region); + intel_region_unmap(intel, src_mt->region); + } + + if (src_mt->stencil_mt) { + intel_miptree_copy_slice(intel, + dst_mt->stencil_mt, src_mt->stencil_mt, + level, face, depth); } } @@ -346,65 +477,256 @@ intel_miptree_copy_teximage(struct intel_context *intel, struct intel_mipmap_tree *src_mt = intelImage->mt; int level = intelImage->base.Base.Level; int face = intelImage->base.Base.Face; - GLuint width = src_mt->level[level].width; - GLuint height = src_mt->level[level].height; - GLuint depth = src_mt->level[level].depth; - int slice; - void *src, *dst; + GLuint depth = intelImage->base.Base.Depth; - if (dst_mt->compressed) { - unsigned int align_w, align_h; + for (int slice = 0; slice < depth; slice++) { + intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice); + } - intel_get_texture_alignment_unit(intelImage->base.Base.TexFormat, - &align_w, &align_h); - height = ALIGN(height, align_h) / align_h; - width = ALIGN(width, align_w); + intel_miptree_reference(&intelImage->mt, dst_mt); +} + +/** + * \param scatter Scatter if true. Gather if false. + * + * \see intel_miptree_s8z24_scatter() + * \see intel_miptree_s8z24_gather() + */ +static void +intel_miptree_s8z24_scattergather(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + bool scatter) +{ + /* Check function inputs. */ + assert(level >= mt->first_level); + assert(level <= mt->last_level); + assert(layer < mt->level[level].depth); + + /* Label everything and its bit layout, just to make the code easier to + * read. + */ + struct intel_mipmap_tree *s8_mt = mt->stencil_mt; + struct intel_mipmap_level *s8_level = &s8_mt->level[level]; + struct intel_mipmap_slice *s8_slice = &s8_mt->level[level].slice[layer]; + + struct intel_mipmap_tree *s8z24_mt = mt; + struct intel_mipmap_level *s8z24_level = &s8z24_mt->level[level]; + struct intel_mipmap_slice *s8z24_slice = &s8z24_mt->level[level].slice[layer]; + + /* Check that both miptree levels have the same dimensions. */ + assert(s8_level->width == s8z24_level->width); + assert(s8_level->height == s8z24_level->height); + assert(s8_level->depth == s8z24_level->depth); + + /* Map the buffers. */ + if (drm_intel_bo_references(intel->batch.bo, s8_mt->region->bo) || + drm_intel_bo_references(intel->batch.bo, s8z24_mt->region->bo)) { + intel_batchbuffer_flush(intel); } + drm_intel_gem_bo_map_gtt(s8_mt->region->bo); + drm_intel_gem_bo_map_gtt(s8z24_mt->region->bo); - for (slice = 0; slice < depth; slice++) { - unsigned int dst_x, dst_y, src_x, src_y; + /* Define the invariant values outside the for loop, because I don't trust + * GCC to do it for us. + */ + uint8_t *s8_map = s8_mt->region->bo->virtual + + s8_slice->x_offset + + s8_slice->y_offset; + + uint8_t *s8z24_map = s8z24_mt->region->bo->virtual + + s8z24_slice->x_offset + + s8z24_slice->y_offset; + + ptrdiff_t s8z24_stride = s8z24_mt->region->pitch * s8z24_mt->region->cpp; + + uint32_t w = s8_level->width; + uint32_t h = s8_level->height; + + for (uint32_t y = 0; y < h; ++y) { + for (uint32_t x = 0; x < w; ++x) { + ptrdiff_t s8_offset = intel_offset_S8(s8_mt->region->pitch, x, y); + ptrdiff_t s8z24_offset = y * s8z24_stride + + x * 4 + + 3; + if (scatter) { + s8_map[s8_offset] = s8z24_map[s8z24_offset]; + } else { + s8z24_map[s8z24_offset] = s8_map[s8_offset]; + } + } + } - intel_miptree_get_image_offset(dst_mt, level, face, slice, - &dst_x, &dst_y); + drm_intel_gem_bo_unmap_gtt(s8_mt->region->bo); + drm_intel_gem_bo_unmap_gtt(s8z24_mt->region->bo); +} - /* Copy potentially with the blitter: - */ - intel_miptree_get_image_offset(src_mt, level, face, slice, - &src_x, &src_y); - - DBG("validate blit mt %p %d,%d/%d -> mt %p %d,%d/%d (%dx%d)\n", - src_mt, src_x, src_y, src_mt->region->pitch * src_mt->region->cpp, - dst_mt, dst_x, dst_y, dst_mt->region->pitch * dst_mt->region->cpp, - width, height); - - if (!intelEmitCopyBlit(intel, - dst_mt->region->cpp, - src_mt->region->pitch, src_mt->region->bo, - 0, src_mt->region->tiling, - dst_mt->region->pitch, dst_mt->region->bo, - 0, dst_mt->region->tiling, - src_x, src_y, - dst_x, dst_y, - width, height, - GL_COPY)) { - - fallback_debug("miptree validate blit for %s failed\n", - _mesa_get_format_name(intelImage->base.Base.TexFormat)); - dst = intel_region_map(intel, dst_mt->region, GL_MAP_WRITE_BIT); - src = intel_region_map(intel, src_mt->region, GL_MAP_READ_BIT); - - _mesa_copy_rect(dst, - dst_mt->cpp, - dst_mt->region->pitch, - dst_x, dst_y, - width, height, - src, src_mt->region->pitch, - src_x, src_y); - - intel_region_unmap(intel, dst_mt->region); - intel_region_unmap(intel, src_mt->region); +void +intel_miptree_s8z24_scatter(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_s8z24_scattergather(intel, mt, level, layer, true); +} + +void +intel_miptree_s8z24_gather(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_s8z24_scattergather(intel, mt, level, layer, false); +} + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + assert(mt->hiz_mt == NULL); + mt->hiz_mt = intel_miptree_create(intel, + mt->target, + MESA_FORMAT_X8_Z24, + mt->first_level, + mt->last_level, + mt->width0, + mt->height0, + mt->depth0, + true); + + if (!mt->hiz_mt) + return false; + + /* Mark that all slices need a HiZ resolve. */ + struct intel_resolve_map *head = &mt->hiz_map; + for (int level = mt->first_level; level <= mt->last_level; ++level) { + for (int layer = 0; layer < mt->level[level].depth; ++layer) { + head->next = malloc(sizeof(*head->next)); + head->next->prev = head; + head->next->next = NULL; + head = head->next; + + head->level = level; + head->layer = layer; + head->need = INTEL_NEED_HIZ_RESOLVE; } } - intel_miptree_reference(&intelImage->mt, dst_mt); + return true; +} + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_check_level_layer(mt, level, layer); + + if (!mt->hiz_mt) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, INTEL_NEED_HIZ_RESOLVE); +} + + +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_check_level_layer(mt, level, layer); + + if (!mt->hiz_mt) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, INTEL_NEED_DEPTH_RESOLVE); +} + +typedef void (*resolve_func_t)(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +static bool +intel_miptree_slice_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + enum intel_need_resolve need, + resolve_func_t func) +{ + intel_miptree_check_level_layer(mt, level, layer); + + struct intel_resolve_map *item = + intel_resolve_map_get(&mt->hiz_map, level, layer); + + if (!item || item->need != need) + return false; + + func(intel, mt, level, layer); + intel_resolve_map_remove(item); + return true; +} + +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + INTEL_NEED_HIZ_RESOLVE, + intel->vtbl.resolve_hiz_slice); +} + +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + INTEL_NEED_DEPTH_RESOLVE, + intel->vtbl.resolve_depth_slice); +} + +static bool +intel_miptree_all_slices_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + enum intel_need_resolve need, + resolve_func_t func) +{ + bool did_resolve = false; + struct intel_resolve_map *i; + + for (i = mt->hiz_map.next; i; i = i->next) { + if (i->need != need) + continue; + func(intel, mt, i->level, i->layer); + intel_resolve_map_remove(i); + did_resolve = true; + } + + return did_resolve; +} + +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + INTEL_NEED_HIZ_RESOLVE, + intel->vtbl.resolve_hiz_slice); +} + +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + INTEL_NEED_DEPTH_RESOLVE, + intel->vtbl.resolve_depth_slice); } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index e29b943..fda704b 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -28,7 +28,10 @@ #ifndef INTEL_MIPMAP_TREE_H #define INTEL_MIPMAP_TREE_H +#include <assert.h> + #include "intel_regions.h" +#include "intel_resolve_map.h" /* A layer on top of the intel_regions code which adds: * @@ -56,6 +59,7 @@ * temporary system buffers. */ +struct intel_resolve_map; struct intel_texture_image; /** @@ -69,22 +73,44 @@ struct intel_mipmap_level GLuint level_y; GLuint width; GLuint height; - /** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */ + + /** + * \brief Number of 2D slices in this miplevel. + * + * The exact semantics of depth varies according to the texture target: + * - For GL_TEXTURE_CUBE_MAP, depth is 6. + * - For GL_TEXTURE_2D_ARRAY, depth is the number of array slices. It is + * identical for all miplevels in the texture. + * - For GL_TEXTURE_3D, it is the texture's depth at this miplevel. Its + * value, like width and height, varies with miplevel. + * - For other texture types, depth is 1. + */ GLuint depth; - /** Number of images at this level: 1 for 1D/2D, 6 for CUBE, depth for 3D */ - GLuint nr_images; - /** @{ - * offsets from level_[xy] to the image for each cube face or depth - * level. + /** + * \brief List of 2D images in this mipmap level. * - * Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table. + * This may be a list of cube faces, array slices in 2D array texture, or + * layers in a 3D texture. The list's length is \c depth. */ - GLuint *x_offset, *y_offset; - /** @} */ + struct intel_mipmap_slice { + /** + * \name Offset to slice + * \{ + * + * Hardware formats are so diverse that that there is no unified way to + * compute the slice offsets, so we store them in this table. + * + * The (x, y) offset to slice \c s at level \c l relative the miptrees + * base address is + * \code + * x = mt->level[l].slice[s].x_offset + * y = mt->level[l].slice[s].y_offset + */ + GLuint x_offset; + GLuint y_offset; + /** \} */ + } *slice; }; struct intel_mipmap_tree @@ -94,6 +120,13 @@ struct intel_mipmap_tree GLenum target; gl_format format; + /** + * The X offset of each image in the miptree must be aligned to this. See + * the "Alignment Unit Size" section of the BSpec. + */ + unsigned int align_w; + unsigned int align_h; /**< \see align_w */ + GLuint first_level; GLuint last_level; @@ -115,18 +148,37 @@ struct intel_mipmap_tree struct intel_region *region; /** - * This points to an auxillary hiz region if all of the following hold: - * 1. The texture has been attached to an FBO as a depthbuffer. - * 2. The texture format is hiz compatible. - * 3. The intel context supports hiz. + * \brief HiZ miptree + * + * This is non-null only if HiZ is enabled for this miptree. + * + * \see intel_miptree_alloc_hiz() + */ + struct intel_mipmap_tree *hiz_mt; + + /** + * \brief Map of miptree slices to needed resolves. * - * When a texture is attached to multiple FBO's, a separate renderbuffer - * wrapper is created for each attachment. This necessitates storing the - * hiz region in the texture itself instead of the renderbuffer wrapper. + * This is used only when the miptree has a child HiZ miptree. * - * \see intel_fbo.c:intel_wrap_texture() + * Let \c mt be a depth miptree with HiZ enabled. Then the resolve map is + * \c mt->hiz_map. The resolve map of the child HiZ miptree, \c + * mt->hiz_mt->hiz_map, is unused. */ - struct intel_region *hiz_region; + struct intel_resolve_map hiz_map; + + /** + * \brief Stencil miptree for depthstencil textures. + * + * This miptree is used for depthstencil textures that require separate + * stencil. The stencil miptree's data is the golden copy of the + * parent miptree's stencil bits. When necessary, we scatter/gather the + * stencil bits between the parent miptree and the stencil miptree. + * + * \see intel_miptree_s8z24_scatter() + * \see intel_miptree_s8z24_gather() + */ + struct intel_mipmap_tree *stencil_mt; /* These are also refcounted: */ @@ -151,6 +203,32 @@ intel_miptree_create_for_region(struct intel_context *intel, gl_format format, struct intel_region *region); +/** + * Create a miptree appropriate as the storage for a non-texture renderbuffer. + * The miptree has the following properties: + * - The target is GL_TEXTURE_2D. + * - There are no levels other than the base level 0. + * - Depth is 1. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t tiling, + uint32_t cpp, + uint32_t width, + uint32_t height); + +/** \brief Assert that the level and layer are valid for the miptree. */ +static inline void +intel_miptree_check_level_layer(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + assert(level >= mt->first_level); + assert(level <= mt->last_level); + assert(layer < mt->level[level].depth); +} + int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, uint32_t tiling, @@ -177,7 +255,6 @@ intel_miptree_get_dimensions_for_image(struct gl_texture_image *image, void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, GLuint level, - GLuint nr_images, GLuint x, GLuint y, GLuint w, GLuint h, GLuint d); @@ -190,6 +267,96 @@ intel_miptree_copy_teximage(struct intel_context *intel, struct intel_texture_image *intelImage, struct intel_mipmap_tree *dst_mt); +/** + * Copy the stencil data from \c mt->stencil_mt->region to \c mt->region for + * the given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_scatter(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t slice); + +/** + * Copy the stencil data in \c mt->stencil_mt->region to \c mt->region for the + * given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_gather(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +/** + * \name Miptree HiZ functions + * \{ + * + * It is safe to call the "slice_set_need_resolve" and "slice_resolve" + * functions on a miptree without HiZ. In that case, each function is a no-op. + */ + +/** + * \brief Allocate the miptree's embedded HiZ miptree. + * \see intel_mipmap_tree:hiz_mt + * \return false if allocation failed + */ + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); +void +intel_miptree_all_slices_set_need_hiz_resolve(struct intel_mipmap_tree *mt); + +void +intel_miptree_all_slices_set_need_depth_resolve(struct intel_mipmap_tree *mt); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/**\}*/ + /* i915_mipmap_tree.c: */ void i915_miptree_layout(struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 89d5c51..2682e15 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -34,6 +34,7 @@ #include "intel_context.h" #include "intel_buffers.h" +#include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_pixel.h" #include "intel_fbo.h" @@ -188,8 +189,8 @@ do_blit_copypixels(struct gl_context * ctx, dsty += draw_irb->draw_y; if (!intel_region_copy(intel, - draw_irb->region, 0, dstx, dsty, - read_irb->region, 0, srcx, srcy, + draw_irb->mt->region, 0, dstx, dsty, + read_irb->mt->region, 0, srcx, srcy, width, height, flip, ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY)) { diff --git a/src/mesa/drivers/dri/intel/intel_resolve_map.c b/src/mesa/drivers/dri/intel/intel_resolve_map.c new file mode 100644 index 0000000..e7d82fa --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_resolve_map.c @@ -0,0 +1,111 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "intel_resolve_map.h" + +#include <assert.h> +#include <stdlib.h> + +/** + * \brief Set that the miptree slice at (level, layer) needs a resolve. + * + * \pre If a map element already exists with the given key, then + * the new and existing element value must be identical. + */ +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum intel_need_resolve need) +{ + struct intel_resolve_map **tail = &head->next; + struct intel_resolve_map *prev = head; + + while (*tail) { + if ((*tail)->level == level && (*tail)->layer == layer) { + assert((*tail)->need == need); + return; + } + prev = *tail; + tail = &(*tail)->next; + } + + *tail = malloc(sizeof(**tail)); + (*tail)->prev = prev; + (*tail)->next = NULL; + (*tail)->level = level; + (*tail)->layer = layer; + (*tail)->need = need; +} + +/** + * \brief Get an element from the map. + * \return null if element is not contained in map. + */ +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer) +{ + struct intel_resolve_map *item = head->next; + + while (item) { + if (item->level == level && item->layer == layer) + break; + else + item = item->next; + } + + return item; +} + +/** + * \brief Remove and free an element from the map. + */ +void +intel_resolve_map_remove(struct intel_resolve_map *elem) +{ + if (elem->prev) + elem->prev->next = elem->next; + if (elem->next) + elem->next->prev = elem->prev; + free(elem); +} + +/** + * \brief Remove and free all elements of the map. + */ +void +intel_resolve_map_clear(struct intel_resolve_map *head) +{ + struct intel_resolve_map *next = head->next; + struct intel_resolve_map *trash; + + while (next) { + trash = next; + next = next->next; + free(trash); + } + + head->next = NULL; +} diff --git a/src/mesa/drivers/dri/intel/intel_resolve_map.h b/src/mesa/drivers/dri/intel/intel_resolve_map.h new file mode 100644 index 0000000..1082c10 --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_resolve_map.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdint.h> + +enum intel_need_resolve { + INTEL_NEED_HIZ_RESOLVE, + INTEL_NEED_DEPTH_RESOLVE, +}; + +/** + * \brief Map of miptree slices to needed resolves. + * + * The map is implemented as a linear doubly-linked list. + * + * In the intel_resolve_map*() functions, the \c head argument is not + * inspected for its data. It only serves as an anchor for the list. + * + * \par Design Discussion + * + * There are two possible ways to record which miptree slices need + * resolves. 1) Maintain a flag for every miptree slice in the texture, + * likely in intel_mipmap_level::slice, or 2) maintain a list of only + * those slices that need a resolve. + * + * Immediately before drawing, a full depth resolve performed on each + * enabled depth texture. If design 1 were chosen, then at each draw call + * it would be necessary to iterate over each miptree slice of each + * enabled depth texture in order to query if each slice needed a resolve. + * In the worst case, this would require 2^16 iterations: 16 texture + * units, 16 miplevels, and 256 depth layers (assuming maximums for OpenGL + * 2.1). + * + * By choosing design 2, the number of iterations is exactly the minimum + * necessary. + */ +struct intel_resolve_map { + uint32_t level; + uint32_t layer; + enum intel_need_resolve need; + + struct intel_resolve_map *next; + struct intel_resolve_map *prev; +}; + +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum intel_need_resolve need); + +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer); + +void +intel_resolve_map_remove(struct intel_resolve_map *elem); + +void +intel_resolve_map_clear(struct intel_resolve_map *head); diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 5a73030..46b822c 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -86,6 +86,7 @@ const GLuint __driNConfigOptions = 11; #include "intel_bufmgr.h" #include "intel_chipset.h" #include "intel_fbo.h" +#include "intel_mipmap_tree.h" #include "intel_screen.h" #include "intel_tex.h" #include "intel_regions.h" @@ -201,7 +202,7 @@ intel_create_image_from_renderbuffer(__DRIcontext *context, image->format = rb->Format; image->data_type = rb->DataType; image->data = loaderPrivate; - intel_region_reference(&image->region, irb->region); + intel_region_reference(&image->region, irb->mt->region); return image; } @@ -679,13 +680,9 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->gen = 2; } - /* - * FIXME: The hiz and separate stencil fields need updating once the - * FIXME: features are completely implemented for a given chipset. - */ - intelScreen->hw_has_separate_stencil = intelScreen->gen >= 7; + intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6; intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7; - intelScreen->hw_has_hiz = false; + intelScreen->hw_has_hiz = intelScreen->gen == 6; /* Not yet for gen7. */ intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_UNKNOWN; intel_override_hiz(intelScreen); diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 31f2828..042bc30 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -39,6 +39,7 @@ #include "intel_buffers.h" #include "intel_fbo.h" +#include "intel_mipmap_tree.h" #include "intel_screen.h" #include "intel_span.h" #include "intel_regions.h" @@ -58,7 +59,7 @@ intel_set_span_functions(struct intel_context *intel, int minx = 0, miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int pitch = rb->RowStride * irb->region->cpp; \ + int pitch = rb->RowStride * irb->mt->region->cpp; \ void *buf = rb->Data; \ GLuint p; \ (void) p; @@ -114,40 +115,6 @@ intel_set_span_functions(struct intel_context *intel, #define TAG2(x,y) intel_##x##y##_A8 #include "spantmp2.h" -/* ------------------------------------------------------------------------- */ -/* s8 stencil span and pixel functions */ -/* ------------------------------------------------------------------------- */ - -/* - * HAVE_HW_STENCIL_SPANS determines if stencil buffer read/writes are done with - * memcpy or for loops. Since the stencil buffer is interleaved, memcpy won't - * work. - */ -#define HAVE_HW_STENCIL_SPANS 0 - -#define LOCAL_STENCIL_VARS \ - (void) ctx; \ - int minx = 0; \ - int miny = 0; \ - int maxx = rb->Width; \ - int maxy = rb->Height; \ - \ - /* \ - * Here we ignore rb->Data and rb->RowStride as set by \ - * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ - * manually, the region's *real* base address and stride is \ - * required. \ - */ \ - struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ - uint8_t *buf = irb->region->bo->virtual; \ - unsigned stride = irb->region->pitch; \ - bool flip = rb->Name == 0; \ - int y_scale = flip ? -1 : 1; \ - int y_bias = flip ? (rb->Height - 1) : 0; \ - -#undef Y_FLIP -#define Y_FLIP(y) (y_scale * (y) + y_bias) - /** * \brief Get pointer offset into stencil buffer. * @@ -210,13 +177,6 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) return u; } -#define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; -#define READ_STENCIL(dest, x, y) dest = buf[intel_offset_S8(stride, x, y)] -#define TAG(x) intel_##x##_S8 -#include "stenciltmp.h" - -/* ------------------------------------------------------------------------- */ - void intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) { @@ -228,10 +188,13 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb) if (!irb) return; - if (irb->wrapped_depth) - intel_renderbuffer_map(intel, irb->wrapped_depth); - if (irb->wrapped_stencil) - intel_renderbuffer_map(intel, irb->wrapped_stencil); + if (rb->Data) { + /* Renderbuffer is already mapped. This usually happens when a single + * buffer is attached to the framebuffer's depth and stencil attachment + * points. + */ + return; + } ctx->Driver.MapRenderbuffer(ctx, rb, 0, 0, rb->Width, rb->Height, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT, @@ -252,10 +215,13 @@ intel_renderbuffer_unmap(struct intel_context *intel, if (!irb) return; - if (irb->wrapped_depth) - intel_renderbuffer_unmap(intel, irb->wrapped_depth); - if (irb->wrapped_stencil) - intel_renderbuffer_unmap(intel, irb->wrapped_stencil); + if (!rb->Data) { + /* Renderbuffer is already unmapped. This usually happens when a single + * buffer is attached to the framebuffer's depth and stencil attachment + * points. + */ + return; + } ctx->Driver.UnmapRenderbuffer(ctx, rb); @@ -288,28 +254,57 @@ intel_framebuffer_unmap(struct intel_context *intel, struct gl_framebuffer *fb) } /** - * Prepare for software rendering. Map current read/draw framebuffers' - * renderbuffes and all currently bound texture objects. + * Resolve all buffers that will be mapped by intelSpanRenderStart(). * - * Old note: Moved locking out to get reasonable span performance. + * Resolve the depth buffer of each enabled texture and of the read and draw + * buffers. + * + * (Note: In the future this will also perform MSAA resolves.) */ -void -intelSpanRenderStart(struct gl_context * ctx) +static void +intel_span_resolve_buffers(struct intel_context *intel) { - struct intel_context *intel = intel_context(ctx); - GLuint i; - - intel_flush(&intel->ctx); - intel_prepare_render(intel); + struct gl_context *ctx = &intel->ctx; + struct intel_renderbuffer *draw_irb; + struct intel_renderbuffer *read_irb; + struct intel_texture_object *tex_obj; + + /* Resolve depth buffer of each enabled texture. */ + for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + intel_finalize_mipmap_tree(intel, i); + if (!tex_obj || !tex_obj->mt) + continue; + intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); + } - for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current; + /* Resolve each attached depth buffer. */ + draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); + read_irb = intel_get_renderbuffer(ctx->ReadBuffer, BUFFER_DEPTH); + if (draw_irb) + intel_renderbuffer_resolve_depth(intel, draw_irb); + if (read_irb != draw_irb && read_irb) + intel_renderbuffer_resolve_depth(intel, read_irb); +} - intel_finalize_mipmap_tree(intel, i); - intel_tex_map_images(intel, intel_texture_object(texObj), - GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); - } +/** + * Map the regions needed by intelSpanRenderStart(). + */ +static void +intel_span_map_buffers(struct intel_context *intel) +{ + struct gl_context *ctx = &intel->ctx; + struct intel_texture_object *tex_obj; + + for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + intel_finalize_mipmap_tree(intel, i); + intel_tex_map_images(intel, tex_obj, + GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); } intel_framebuffer_map(intel, ctx->DrawBuffer); @@ -319,6 +314,24 @@ intelSpanRenderStart(struct gl_context * ctx) } /** + * Prepare for software rendering. Map current read/draw framebuffers' + * renderbuffes and all currently bound texture objects. + * + * Old note: Moved locking out to get reasonable span performance. + */ +void +intelSpanRenderStart(struct gl_context * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush(ctx); + intel_prepare_render(intel); + intel_span_resolve_buffers(intel); + intel_flush(ctx); + intel_span_map_buffers(intel); +} + +/** * Called when done software rendering. Unmap the buffers we mapped in * the above function. */ @@ -405,7 +418,7 @@ static span_init_func intel_span_init_funcs[MESA_FORMAT_COUNT] = [MESA_FORMAT_Z16] = _mesa_set_renderbuffer_accessors, [MESA_FORMAT_X8_Z24] = _mesa_set_renderbuffer_accessors, [MESA_FORMAT_S8_Z24] = _mesa_set_renderbuffer_accessors, - [MESA_FORMAT_S8] = intel_InitStencilPointers_S8, + [MESA_FORMAT_S8] = _mesa_set_renderbuffer_accessors, [MESA_FORMAT_R8] = _mesa_set_renderbuffer_accessors, [MESA_FORMAT_RG88] = _mesa_set_renderbuffer_accessors, [MESA_FORMAT_R16] = _mesa_set_renderbuffer_accessors, diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 05b0748..9884a57 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -69,11 +69,6 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, */ ctx->Driver.FreeTextureImageBuffer(ctx, image); - if (intel->must_use_separate_stencil - && image->TexFormat == MESA_FORMAT_S8_Z24) { - intel_tex_image_s8z24_create_renderbuffers(intel, intel_image); - } - /* Allocate the swrast_texture_image::ImageOffsets array now */ switch (texobj->Target) { case GL_TEXTURE_3D: @@ -107,11 +102,6 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, */ intel_miptree_reference(&intel_texobj->mt, intel_image->mt); - if (intel->must_use_separate_stencil - && image->TexFormat == MESA_FORMAT_S8_Z24) { - intel_tex_image_s8z24_create_renderbuffers(intel, intel_image); - } - DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", __FUNCTION__, texobj, image->Level, width, height, depth, intel_image->mt); @@ -139,9 +129,6 @@ intel_free_texture_image_buffer(struct gl_context * ctx, free(intelImage->base.ImageOffsets); intelImage->base.ImageOffsets = NULL; } - - _mesa_reference_renderbuffer(&intelImage->depth_rb, NULL); - _mesa_reference_renderbuffer(&intelImage->stencil_rb, NULL); } /** @@ -174,13 +161,19 @@ intel_map_texture_image(struct gl_context *ctx, assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || h == 1); - if (intel_image->stencil_rb) { - /* - * The texture has packed depth/stencil format, but uses separate - * stencil. The texture's embedded stencil buffer contains the real - * stencil data, so copy that into the miptree. + if (mt->stencil_mt) { + /* The miptree has depthstencil format, but uses separate stencil. The + * embedded stencil miptree contains the real stencil data, so gather + * that into the depthstencil miptree. + * + * FIXME: Avoid the gather if the texture is mapped as write-only. */ - intel_tex_image_s8z24_gather(intel, intel_image); + intel_miptree_s8z24_gather(intel, mt, tex_image->Level, slice); + } + + intel_miptree_slice_resolve_depth(intel, mt, tex_image->Level, slice); + if (mode & GL_MAP_WRITE_BIT) { + intel_miptree_slice_set_needs_hiz_resolve(mt, tex_image->Level, slice); } /* For compressed formats, the stride is the number of bytes per @@ -211,16 +204,19 @@ intel_unmap_texture_image(struct gl_context *ctx, { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intel_image = intel_texture_image(tex_image); + struct intel_mipmap_tree *mt = intel_image->mt; intel_region_unmap(intel, intel_image->mt->region); - if (intel_image->stencil_rb) { - /* - * The texture has packed depth/stencil format, but uses separate - * stencil. The texture's embedded stencil buffer contains the real - * stencil data, so copy that into the miptree. + if (mt->stencil_mt) { + /* The miptree has depthstencil format, but uses separate stencil. The + * embedded stencil miptree must contain the real stencil data after + * unmapping, so copy it from the depthstencil miptree into the stencil + * miptree. + * + * FIXME: Avoid the scatter if the texture was mapped as read-only. */ - intel_tex_image_s8z24_scatter(intel, intel_image); + intel_miptree_s8z24_scatter(intel, mt, tex_image->Level, slice); } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 2df4ef6..543326a 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -71,6 +71,7 @@ intel_copy_texsubimage(struct intel_context *intel, { struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb; + struct intel_region *region; const GLenum internalFormat = intelImage->base.Base.InternalFormat; bool copy_supported = false; bool copy_supported_with_alpha_override = false; @@ -78,11 +79,14 @@ intel_copy_texsubimage(struct intel_context *intel, intel_prepare_render(intel); irb = get_teximage_readbuffer(intel, internalFormat); - if (!intelImage->mt || !irb || !irb->region) { + if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, irb, internalFormat); return false; + } else { + region = irb->mt->region; + assert(region); } copy_supported = intelImage->base.Base.TexFormat == irb->Base.Format; @@ -127,19 +131,19 @@ intel_copy_texsubimage(struct intel_context *intel, if (ctx->ReadBuffer->Name == 0) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); - src_pitch = -irb->region->pitch; + src_pitch = -region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ - src_pitch = irb->region->pitch; + src_pitch = region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, - irb->region->bo, + region->bo, 0, - irb->region->tiling, + region->tiling, intelImage->mt->region->pitch, intelImage->mt->region->bo, 0, diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 3fc2128..dd0c6d3 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -197,134 +197,6 @@ try_pbo_upload(struct gl_context *ctx, return true; } -/** - * \param scatter Scatter if true. Gather if false. - * - * \see intel_tex_image_x8z24_scatter - * \see intel_tex_image_x8z24_gather - */ -static void -intel_tex_image_s8z24_scattergather(struct intel_context *intel, - struct intel_texture_image *intel_image, - bool scatter) -{ - struct gl_context *ctx = &intel->ctx; - struct gl_renderbuffer *depth_rb = intel_image->depth_rb; - struct gl_renderbuffer *stencil_rb = intel_image->stencil_rb; - int w, h, d; - - intel_miptree_get_dimensions_for_image(&intel_image->base.Base, &w, &h, &d); - assert(d == 1); /* FINISHME */ - - uint32_t depth_row[w]; - uint8_t stencil_row[w]; - - intel_renderbuffer_map(intel, depth_rb); - intel_renderbuffer_map(intel, stencil_rb); - - if (scatter) { - for (int y = 0; y < h; ++y) { - depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); - for (int x = 0; x < w; ++x) { - stencil_row[x] = depth_row[x] >> 24; - } - stencil_rb->PutRow(ctx, stencil_rb, w, 0, y, stencil_row, NULL); - } - } else { /* gather */ - for (int y = 0; y < h; ++y) { - depth_rb->GetRow(ctx, depth_rb, w, 0, y, depth_row); - stencil_rb->GetRow(ctx, stencil_rb, w, 0, y, stencil_row); - for (int x = 0; x < w; ++x) { - uint32_t s8_x24 = stencil_row[x] << 24; - uint32_t x8_z24 = depth_row[x] & 0x00ffffff; - depth_row[x] = s8_x24 | x8_z24; - } - depth_rb->PutRow(ctx, depth_rb, w, 0, y, depth_row, NULL); - } - } - - intel_renderbuffer_unmap(intel, depth_rb); - intel_renderbuffer_unmap(intel, stencil_rb); -} - -/** - * Copy the x8 bits from intel_image->depth_rb to intel_image->stencil_rb. - */ -void -intel_tex_image_s8z24_scatter(struct intel_context *intel, - struct intel_texture_image *intel_image) -{ - intel_tex_image_s8z24_scattergather(intel, intel_image, true); -} - -/** - * Copy the data in intel_image->stencil_rb to the x8 bits in - * intel_image->depth_rb. - */ -void -intel_tex_image_s8z24_gather(struct intel_context *intel, - struct intel_texture_image *intel_image) -{ - intel_tex_image_s8z24_scattergather(intel, intel_image, false); -} - -bool -intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel, - struct intel_texture_image *image) -{ - struct gl_context *ctx = &intel->ctx; - bool ok = true; - int width, height, depth; - struct gl_renderbuffer *drb; - struct gl_renderbuffer *srb; - struct intel_renderbuffer *idrb; - struct intel_renderbuffer *isrb; - - intel_miptree_get_dimensions_for_image(&image->base.Base, - &width, &height, &depth); - assert(depth == 1); /* FINISHME */ - - assert(intel->has_separate_stencil); - assert(image->base.Base.TexFormat == MESA_FORMAT_S8_Z24); - assert(image->mt != NULL); - - drb = intel_create_wrapped_renderbuffer(ctx, width, height, - MESA_FORMAT_X8_Z24); - srb = intel_create_wrapped_renderbuffer(ctx, width, height, - MESA_FORMAT_S8); - - if (!drb || !srb) { - if (drb) { - drb->Delete(drb); - } - if (srb) { - srb->Delete(srb); - } - return false; - } - - idrb = intel_renderbuffer(drb); - isrb = intel_renderbuffer(srb); - - intel_region_reference(&idrb->region, image->mt->region); - ok = intel_alloc_renderbuffer_storage(ctx, srb, GL_STENCIL_INDEX8, - width, height); - - if (!ok) { - drb->Delete(drb); - srb->Delete(srb); - return false; - } - - intel_renderbuffer_set_draw_offset(idrb, image, 0); - intel_renderbuffer_set_draw_offset(isrb, image, 0); - - _mesa_reference_renderbuffer(&image->depth_rb, drb); - _mesa_reference_renderbuffer(&image->stencil_rb, srb); - - return true; -} - static void intelTexImage(struct gl_context * ctx, GLint dims, @@ -471,7 +343,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, /* If the region isn't set, then intel_update_renderbuffers was unable * to get the buffers for the drawable. */ - if (rb->region == NULL) + if (!rb || !rb->mt) return; if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { @@ -485,7 +357,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, _mesa_lock_texture(&intel->ctx, texObj); texImage = _mesa_get_tex_image(ctx, texObj, target, level); - intel_set_texture_image_region(ctx, texImage, rb->region, target, + intel_set_texture_image_region(ctx, texImage, rb->mt->region, target, internalFormat, texFormat); _mesa_unlock_texture(&intel->ctx, texObj); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index 64f4a70..65645bc 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -33,26 +33,113 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" #include "intel_context.h" + +#include "main/image.h" #include "main/macros.h" +static unsigned int +intel_horizontal_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit width ("i") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 | + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 | + * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 | + * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 | + * | All Others | 4 | 4 | 4 | 4 | 4 | + * +----------------------------------------------------------------------+ + * + * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8. + */ + if (_mesa_is_format_compressed(format)) { + /* The hardware alignment requirements for compressed textures + * happen to match the block boundaries. + */ + unsigned int i, j; + _mesa_get_format_block_size(format, &i, &j); + return i; + } + + if (format == MESA_FORMAT_S8) + return 8; + + if (intel->gen >= 7 && format == MESA_FORMAT_Z16) + return 8; + + return 4; +} + +static unsigned int +intel_vertical_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit height ("j") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 | + * | Depth Buffer | 2 | 2 | 2 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 | + * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 | + * | All Others | 2 | 2 | 2 | 2 | 2 | + * +----------------------------------------------------------------------+ + * + * On SNB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Vertical Alignment" field to VALIGN_2 or VALIGN_4. + * + * We currently don't support multisampling. + */ + if (_mesa_is_format_compressed(format)) + return 4; + + if (format == MESA_FORMAT_S8) + return intel->gen >= 7 ? 8 : 4; + + GLenum base_format = _mesa_get_format_base_format(format); + + if (intel->gen >= 6 && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL)) { + return 4; + } + + return 2; +} + void -intel_get_texture_alignment_unit(gl_format format, +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, unsigned int *w, unsigned int *h) { - if (_mesa_is_format_compressed(format)) { - /* The hardware alignment requirements for compressed textures - * happen to match the block boundaries. - */ - _mesa_get_format_block_size(format, w, h); - } else { - *w = 4; - *h = 2; - } + *w = intel_horizontal_texture_alignment_unit(intel, format); + *h = intel_vertical_texture_alignment_unit(intel, format); } -void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, int nr_images) +void i945_miptree_layout_2d(struct intel_mipmap_tree *mt) { - GLuint align_h, align_w; GLuint level; GLuint x = 0; GLuint y = 0; @@ -61,10 +148,9 @@ void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, int nr_images) GLuint depth = mt->depth0; /* number of array layers. */ mt->total_width = mt->width0; - intel_get_texture_alignment_unit(mt->format, &align_w, &align_h); if (mt->compressed) { - mt->total_width = ALIGN(mt->width0, align_w); + mt->total_width = ALIGN(mt->width0, mt->align_w); } /* May need to adjust width to accomodate the placement of @@ -76,10 +162,10 @@ void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, int nr_images) GLuint mip1_width; if (mt->compressed) { - mip1_width = ALIGN(minify(mt->width0), align_w) - + ALIGN(minify(minify(mt->width0)), align_w); + mip1_width = ALIGN(minify(mt->width0), mt->align_w) + + ALIGN(minify(minify(mt->width0)), mt->align_w); } else { - mip1_width = ALIGN(minify(mt->width0), align_w) + mip1_width = ALIGN(minify(mt->width0), mt->align_w) + minify(minify(mt->width0)); } @@ -93,12 +179,12 @@ void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, int nr_images) for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { GLuint img_height; - intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + intel_miptree_set_level_info(mt, level, x, y, width, height, depth); - img_height = ALIGN(height, align_h); + img_height = ALIGN(height, mt->align_h); if (mt->compressed) - img_height /= align_h; + img_height /= mt->align_h; /* Because the images are packed better, the final offset * might not be the maximal one: @@ -108,7 +194,7 @@ void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, int nr_images) /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += ALIGN(width, align_w); + x += ALIGN(width, mt->align_w); } else { y += img_height; diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h index 257c07c..12ed16d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.h +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h @@ -38,7 +38,9 @@ static INLINE GLuint minify( GLuint d ) return MAX2(1, d>>1); } -extern void i945_miptree_layout_2d(struct intel_mipmap_tree *mt, - int nr_images); -void intel_get_texture_alignment_unit(gl_format format, - unsigned int *w, unsigned int *h); +extern void i945_miptree_layout_2d(struct intel_mipmap_tree *mt); + +void +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, + unsigned int *w, unsigned int *h); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index 06904e4..8b278ba 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -66,36 +66,6 @@ struct intel_texture_image */ struct intel_mipmap_tree *mt; bool used_as_render_target; - - /** - * \name Renderbuffers for faking packed depth/stencil - * - * These renderbuffers are non-null only if the intel_context is using - * separate stencil and this texture has a packed depth/stencil format. When - * glFramebufferTexture is called on this image, the resultant renderbuffer - * wrapper reuses these renderbuffers as its own. - * - * \see intel_wrap_texture - * \see intel_tex_image_s8z24_create_renderbuffers - * \see intel_tex_image_s8z24_scatter - * \see intel_tex_image_s8z24_gather - * - * \{ - */ - - /** - * The depth buffer has format X8_Z24. The x8 bits are undefined unless - * intel_tex_image_s8z24_gather has been immediately called. The depth - * buffer reuses the image miptree's region and hiz_region as its own. - */ - struct gl_renderbuffer *depth_rb; - - /** - * The stencil buffer has format S8 and keeps its data in its own region. - */ - struct gl_renderbuffer *stencil_rb; - - /** \} */ }; static INLINE struct intel_texture_object * diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index f4c1a68..748fbdc 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -154,7 +154,7 @@ intel_tex_map_image_for_swrast(struct intel_context *intel, intel_image->base.Data = intel_region_map(intel, mt->region, mode); } else { - assert(mt->level[level].depth == 1); + assert(intel_image->base.Base.Depth == 1); intel_miptree_get_image_offset(mt, level, face, 0, &x, &y); DBG("%s: (%d,%d) -> (%d, %d)/%d\n", |