diff options
40 files changed, 907 insertions, 2599 deletions
diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c new file mode 100644 index 0000000..e7a4dac --- /dev/null +++ b/src/gallium/drivers/i965/brw_bo.c @@ -0,0 +1,12 @@ + + +void brw_buffer_subdata() +{ + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } +} diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 1088a7a..9ab5638 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -62,84 +62,21 @@ const struct brw_tracked_state brw_cc_vp = { }; struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; /* no color mask */ }; static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - memset(key, 0, sizeof(*key)); + + key->dsa = brw->curr.dsa.base; + key->blend = brw->curr.blend.base; - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + /* Clear non-respected values: + */ + key->blend.colormask = 0xf; } /** @@ -153,103 +90,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(&cc, 0, sizeof(cc)); - /* _NEW_STENCIL */ - if (key->stencil) { - cc.cc0.stencil_enable = 1; - cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); - cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); - cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); - cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - - if (key->stencil_two_side) { - cc.cc0.bf_stencil_enable = 1; - cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); - cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; - } - - /* Not really sure about this: - */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) - cc.cc0.stencil_write_enable = 1; - } - - /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); - } - - if (key->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; - } - - /* _NEW_DEPTH */ - if (key->depth_test) { - cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; - } + cc.cc0 = brw->dsa.cc0; + cc.cc1 = brw->dsa.cc1; + cc.cc2 = brw->dsa.cc2; + cc.cc3 = brw->dsa.cc3 | brw->blend.cc3; /* CACHE_NEW_CC_VP */ cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ - if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; + cc.cc5 = brw->blend.cc5 | brw->debug.cc5; + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), @@ -286,7 +136,7 @@ static void prepare_cc_unit( struct brw_context *brw ) const struct brw_tracked_state brw_cc_unit = { .dirty = { - .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, + .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND, .brw = 0, .cache = CACHE_NEW_CC_VP }, diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 20a927c..df1b371 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -29,9 +29,9 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_state.h" + +#include "util/u_math.h" #include "intel_batchbuffer.h" @@ -83,7 +83,7 @@ static void compile_clip_prog( struct brw_context *brw, delta += ATTR_SIZE; } - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -104,16 +104,16 @@ static void compile_clip_prog( struct brw_context *brw, * do all three: */ switch (key->primitive) { - case GL_TRIANGLES: + case PIPE_PRIM_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; - case GL_LINES: + case PIPE_PRIM_LINES: brw_emit_line_clip( &c ); break; - case GL_POINTS: + case PIPE_PRIM_POINTS: brw_emit_point_clip( &c ); break; default: @@ -143,7 +143,6 @@ static void compile_clip_prog( struct brw_context *brw, */ static void upload_clip_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); @@ -151,101 +150,51 @@ static void upload_clip_prog(struct brw_context *brw) /* Populate the key: */ /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->intel.reduced_primitive; + key.primitive = brw->reduced_primitive; /* CACHE_NEW_VS_PROG */ key.attrs = brw->vs.prog_data->outputs_written; - /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_RAST */ + key.do_flat_shading = brw->rast.base.flatshade; + /* PIPE_NEW_UCP */ + key.nr_userclip = brw->nr_ucp; if (BRW_IS_IGDNG(brw)) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; - /* _NEW_POLYGON */ - if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + /* PIPE_NEW_RAST */ + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->rast->cull_mode = PIPE_WINDING_BOTH) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { - GLuint fill_front = CLIP_CULL; - GLuint fill_back = CLIP_CULL; - GLuint offset_front = 0; - GLuint offset_back = 0; - - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_FRONT) { - switch (ctx->Polygon.FrontMode) { - case GL_FILL: - fill_front = CLIP_FILL; - offset_front = 0; - break; - case GL_LINE: - fill_front = CLIP_LINE; - offset_front = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_front = CLIP_POINT; - offset_front = ctx->Polygon.OffsetPoint; - break; - } + key.fill_ccw = CLIP_CULL; + key.fill_cw = CLIP_CULL; + + if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { + key.fill_ccw = translate_fill(brw->rast.fill_ccw); } - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_BACK) { - switch (ctx->Polygon.BackMode) { - case GL_FILL: - fill_back = CLIP_FILL; - offset_back = 0; - break; - case GL_LINE: - fill_back = CLIP_LINE; - offset_back = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_back = CLIP_POINT; - offset_back = ctx->Polygon.OffsetPoint; - break; - } + if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { + key.fill_cw = translate_fill(brw->rast.fill_cw); } - if (ctx->Polygon.BackMode != GL_FILL || - ctx->Polygon.FrontMode != GL_FILL) { + if (key.fill_cw != CLIP_FILL || + key.fill_ccw != CLIP_FILL) { key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key.offset_ccw = brw->rast.offset_ccw; + key.offset_cw = brw->rast.offset_cw; + + if (brw->rast.light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; - if (offset_back || offset_front) { - /* _NEW_POLYGON, _NEW_BUFFERS */ - key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; - key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; - } - - switch (ctx->Polygon.FrontFace) { - case GL_CCW: - key.fill_ccw = fill_front; - key.fill_cw = fill_back; - key.offset_ccw = offset_front; - key.offset_cw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - break; - case GL_CW: - key.fill_cw = fill_front; - key.fill_ccw = fill_back; - key.offset_cw = offset_front; - key.offset_ccw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - break; - } + if (brw->rast.light_twoside && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; } } } @@ -262,10 +211,8 @@ static void upload_clip_prog(struct brw_context *brw) const struct brw_tracked_state brw_clip_prog = { .dirty = { - .mesa = (_NEW_LIGHT | - _NEW_TRANSFORM | - _NEW_POLYGON | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_RAST | + PIPE_NEW_UCP), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 957df44..d80ec81 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -43,6 +43,7 @@ */ struct brw_clip_prog_key { GLuint attrs:32; + GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,12 +52,10 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:17; - GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:27; + GLuint pad1:12; GLfloat offset_factor; GLfloat offset_units; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 048ca62..6b4da25 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -29,13 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index 8458f61..b2cf7b2 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -29,13 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 234b374..72e2720 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -66,8 +65,8 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; - /* _NEW_TRANSOFORM */ - key->depth_clamp = ctx->Transform.DepthClamp; + /* */ + key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; } static dri_bo * @@ -175,7 +174,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 0efd772..d8feca6 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -29,13 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index ad1bfa4..4baff55 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,11 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 5a73abd..7a6c46c 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -30,13 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index c300c33..bf0ec89 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -52,122 +52,77 @@ #include "utils.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -static void brwUseProgram(GLcontext *ctx, GLuint program) -{ - _mesa_use_program(ctx, program); -} - -static void brwInitProgFuncs( struct dd_function_table *functions ) -{ - functions->UseProgram = brwUseProgram; -} -static void brwInitDriverFunctions( struct dd_function_table *functions ) -{ - intelInitDriverFunctions( functions ); - - brwInitFragProgFuncs( functions ); - brwInitProgFuncs( functions ); - brw_init_queryobj_functions(functions); - - functions->Viewport = intel_viewport; -} GLboolean brwCreateContext( const __GLcontextModes *mesaVis, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate) { - struct dd_function_table functions; struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); - return GL_FALSE; - } - - brwInitVtbl( brw ); - brwInitDriverFunctions( &functions ); - - if (!intelInitContext( intel, mesaVis, driContextPriv, - sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); - FREE(brw); + debug_printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs(ctx); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - - ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ - ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, - ctx->Const.MaxTextureImageUnits); - ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - - /* Mesa limits textures to 4kx4k; it would be nice to fix that someday - */ - ctx->Const.MaxTextureLevels = 13; - ctx->Const.Max3DTextureLevels = 9; - ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<12); - - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - /* if conformance mode is set, swrast can handle any size AA point */ - ctx->Const.MaxPointSizeAA = 255.0; - /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; ctx->Shader.EmitNVTempInitialization = GL_TRUE; - ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.VertexProgram.MaxAluInstructions = 0; - ctx->Const.VertexProgram.MaxTexInstructions = 0; - ctx->Const.VertexProgram.MaxTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - ctx->Const.VertexProgram.MaxNativeParameters = 1024; - ctx->Const.VertexProgram.MaxEnvParams = - MIN2(ctx->Const.VertexProgram.MaxNativeParameters, - ctx->Const.VertexProgram.MaxEnvParams); - - ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAttribs = 12; - ctx->Const.FragmentProgram.MaxNativeTemps = 256; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; - ctx->Const.FragmentProgram.MaxNativeParameters = 1024; - ctx->Const.FragmentProgram.MaxEnvParams = - MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, - ctx->Const.FragmentProgram.MaxEnvParams); + brw_init_query( brw ); brw_init_state( brw ); + brw_draw_init( brw ); brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; brw->emit_state_always = 0; - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; } +/** + * called from intelDestroyContext() + */ +static void brw_destroy_context( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + int i; + + brw_destroy_state(brw); + brw_draw_destroy( brw ); + + _mesa_free(brw->wm.compile_data); + + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; + intel_region_release(&brw->state.depth_region); + + dri_bo_unreference(brw->curbe.curbe_bo); + dri_bo_unreference(brw->vs.prog_bo); + dri_bo_unreference(brw->vs.state_bo); + dri_bo_unreference(brw->vs.bind_bo); + dri_bo_unreference(brw->gs.prog_bo); + dri_bo_unreference(brw->gs.state_bo); + dri_bo_unreference(brw->clip.prog_bo); + dri_bo_unreference(brw->clip.state_bo); + dri_bo_unreference(brw->clip.vp_bo); + dri_bo_unreference(brw->sf.prog_bo); + dri_bo_unreference(brw->sf.state_bo); + dri_bo_unreference(brw->sf.vp_bo); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) + dri_bo_unreference(brw->wm.sdc_bo[i]); + dri_bo_unreference(brw->wm.bind_bo); + for (i = 0; i < BRW_WM_MAX_SURF; i++) + dri_bo_unreference(brw->wm.surf_bo[i]); + dri_bo_unreference(brw->wm.sampler_bo); + dri_bo_unreference(brw->wm.prog_bo); + dri_bo_unreference(brw->wm.state_bo); + dri_bo_unreference(brw->cc.prog_bo); + dri_bo_unreference(brw->cc.state_bo); + dri_bo_unreference(brw->cc.vp_bo); +} diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index fa3e32c..009e28b 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -115,7 +115,6 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -450,11 +449,9 @@ struct brw_query_object { */ struct brw_context { - struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; - GLboolean tmp_fallback; GLboolean no_batch_wrap; struct { @@ -692,7 +689,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_queryobj.c */ -void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_init_query(struct brw_context *brw); void brw_prepare_query_begin(struct brw_context *brw); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); @@ -730,7 +727,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst); * macros used previously: */ static INLINE struct brw_context * -brw_context( GLcontext *ctx ) +brw_context( struct pipe_context *ctx ) { return (struct brw_context *)ctx; } diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 4be6c77..3e32c49 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,14 +30,6 @@ */ - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" @@ -64,31 +56,17 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_clip_regs = 0; GLuint total_regs; - /* _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_UCP */ + if (brw->nr_ucp) { + GLuint nr_planes = 6 + brw->nr_ucp; nr_clip_regs = (nr_planes * 4 + 15) / 16; } total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - /* This can happen - what to do? Probably rather than falling - * back, the best thing to do is emit programs which code the - * constants as immediate values. Could do this either as a static - * cap on WM and VS, or adaptively. - * - * Unfortunately, this is currently dependent on the results of the - * program generation process (in the case of wm), so this would - * introduce the need to re-generate programs in the event of a - * curbe allocation failure. - */ - /* Max size is 32 - just large enough to - * hold the 128 parameters allowed by - * the fragment and vertex program - * api's. It's not clear what happens - * when both VP and FP want to use 128 - * parameters, though. + /* When this is > 32, want to use a true constant buffer to hold + * the extra constants. */ assert(total_regs <= 32); @@ -113,8 +91,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; brw->curbe.total_size = reg; - if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + if (BRW_DEBUG & DEBUG_CURBE) + debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", brw->curbe.wm_start, brw->curbe.wm_size, brw->curbe.clip_start, @@ -129,7 +107,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = PIPE_NEW_UCP, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, @@ -204,11 +182,13 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* map fs constant buffer */ /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; + + /* unmap fs constant buffer */ } @@ -228,18 +208,15 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 3] = fixed_plane[i][3]; } - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: + /* Clip planes: */ - assert(MAX_CLIP_PLANES == 6); - for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { - buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; - i++; - } + assert(brw->nr_ucp <= 6); + for (j = 0; j < brw->nr_ucp; j++) { + buf[offset + i * 4 + 0] = brw->ucp[j][0]; + buf[offset + i * 4 + 1] = brw->ucp[j][1]; + buf[offset + i * 4 + 2] = brw->ucp[j][2]; + buf[offset + i * 4 + 3] = brw->ucp[j][3]; + i++; } } @@ -248,13 +225,7 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* map vs constant buffer */ /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { @@ -264,14 +235,16 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 2] = value[2]; buf[offset + i * 4 + 3] = value[3]; } + + /* unmap vs constant buffer */ } if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); @@ -282,12 +255,12 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + FREE(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -353,15 +326,11 @@ static void emit_constant_buffer(struct brw_context *brw) ADVANCE_BATCH(); } -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs. This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, + .mesa = (PIPE_NEW_FS_CONSTANTS | + PIPE_NEW_VS_CONSTANTS | + PIPE_NEW_UCP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 78d457a..282c5b1 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID)) #define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) #define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) #define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 9fef230..a84c581 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -27,8 +27,6 @@ #include <unistd.h> #include <stdarg.h> -#include "main/mtypes.h" - #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 44bb7bd..8cd117c 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -39,14 +39,13 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH -static GLuint prim_to_hw_prim[GL_POLYGON+1] = { +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, _3DPRIM_LINELIST, _3DPRIM_LINELOOP, @@ -60,19 +59,6 @@ static GLuint prim_to_hw_prim[GL_POLYGON+1] = { }; -static const GLenum reduced_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES -}; - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the @@ -196,102 +182,6 @@ static void brw_merge_inputs( struct brw_context *brw, brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } -/* XXX: could split the primitive list to fallback only on the - * non-conformant primitives. - */ -static GLboolean check_fallbacks( struct brw_context *brw, - const struct _mesa_prim *prim, - GLuint nr_prims ) -{ - GLcontext *ctx = &brw->intel.ctx; - GLuint i; - - /* If we don't require strict OpenGL conformance, never - * use fallbacks. If we're forcing fallbacks, always - * use fallfacks. - */ - if (brw->intel.conformance_mode == 0) - return GL_FALSE; - - if (brw->intel.conformance_mode == 2) - return GL_TRUE; - - if (ctx->Polygon.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_TRIANGLES) - return GL_TRUE; - } - - /* BRW hardware will do AA lines, but they are non-conformant it - * seems. TBD whether we keep this fallback: - */ - if (ctx->Line.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_LINES) - return GL_TRUE; - } - - /* Stipple -- these fallbacks could be resolved with a little - * bit of work? - */ - if (ctx->Line.StippleFlag) { - for (i = 0; i < nr_prims; i++) { - /* GS doesn't get enough information to know when to reset - * the stipple counter?!? - */ - if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) - return GL_TRUE; - - if (prim[i].mode == GL_POLYGON && - (ctx->Polygon.FrontMode == GL_LINE || - ctx->Polygon.BackMode == GL_LINE)) - return GL_TRUE; - } - } - - if (ctx->Point.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (prim[i].mode == GL_POINTS) - return GL_TRUE; - } - - /* BRW hardware doesn't handle GL_CLAMP texturing correctly; - * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP - * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and - * we want strict conformance, force the fallback. - * Right now, we only do this for 2D textures. - */ - { - int u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; - if (texUnit->Enabled) { - if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { - return GL_TRUE; - } - } - } - } - } - - /* Nothing stopping us from the fast path now */ - return GL_FALSE; -} - /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -308,23 +198,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; GLboolean first_time = GL_TRUE; + uint32_t hw_prim; GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures( brw ); - - if (check_fallbacks(brw, prim, nr_prims)) - return GL_FALSE; - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -336,90 +215,30 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; - /* Have to validate state quite late. Will rebuild tnl_program, - * which depends on varying information. - * - * Note this is where brw->vs->prog_data.inputs_read is calculated, - * so can't access it earlier. - */ - - LOCK_HARDWARE(intel); - - if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { - UNLOCK_HARDWARE(intel); - return GL_TRUE; - } - - for (i = 0; i < nr_prims; i++) { - uint32_t hw_prim; - - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). - */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, - LOOP_CLIPRECTS); - - hw_prim = brw_set_prim(brw, prim[i].mode); - - if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { - first_time = GL_FALSE; - - brw_validate_state(brw); - - /* Various fallback checks: */ - if (brw->intel.Fallback) - goto out; - - /* Check that we can fit our state in with our existing batchbuffer, or - * flush otherwise. - */ - if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - static GLboolean warned; - intel_batchbuffer_flush(intel->batch); - - /* Validate the state after we flushed the batch (which would have - * changed the set of dirty state). If we still fail to - * check_aperture, warn of what's happening, but attempt to continue - * on since it may succeed anyway, and the user would probably rather - * see a failure and a warning than a fallback. - */ - brw_validate_state(brw); - if (!warned && - dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - warn = GL_TRUE; - warned = GL_TRUE; - } - } - - brw_upload_state(brw); - } + hw_prim = brw_set_prim(brw, prim[i].mode); - brw_emit_prim(brw, &prim[i], hw_prim); + brw_validate_state(brw); - retval = GL_TRUE; - } + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count); + if (ret) + return ret; + + ret = brw_upload_state(brw); + if (ret) + return ret; + + ret = brw_emit_prim(brw, &prim[i], hw_prim); + if (ret) + return ret; if (intel->always_flush_batch) intel_batchbuffer_flush(intel->batch); - out: - UNLOCK_HARDWARE(intel); - - brw_state_cache_check_size(brw); - - if (warn) - fprintf(stderr, "i965: Single primitive emit potentially exceeded " - "available aperture space\n"); - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; + return 0; } void brw_draw_prims( GLcontext *ctx, @@ -431,37 +250,26 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - GLboolean retval; + enum pipe_error ret; if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (min_index != 0) { - vbo_rebase_prims(ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - return; - } } /* Make a first attempt at drawing: */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. */ - if (!retval) { - _swsetup_Wakeup(ctx); - _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + if (ret != 0) { + intel_batchbuffer_flush(intel->batch); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + assert(ret == 0); } - } void brw_draw_init( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index a3ff6c5..ad3ef6b 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -25,13 +25,9 @@ * **************************************************************************/ +#include "pipe/p_context.h" -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/state.h" -#include "main/api_validate.h" -#include "main/enums.h" +#include "util/u_upload_mgr.h" #include "brw_draw.h" #include "brw_defines.h" @@ -43,303 +39,157 @@ #include "intel_buffer_objects.h" #include "intel_tex.h" -static GLuint double_types[5] = { - 0, - BRW_SURFACEFORMAT_R64_FLOAT, - BRW_SURFACEFORMAT_R64G64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64A64_FLOAT -}; - -static GLuint float_types[5] = { - 0, - BRW_SURFACEFORMAT_R32_FLOAT, - BRW_SURFACEFORMAT_R32G32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT -}; - -static GLuint uint_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_UNORM, - BRW_SURFACEFORMAT_R32G32_UNORM, - BRW_SURFACEFORMAT_R32G32B32_UNORM, - BRW_SURFACEFORMAT_R32G32B32A32_UNORM -}; - -static GLuint uint_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_USCALED, - BRW_SURFACEFORMAT_R32G32_USCALED, - BRW_SURFACEFORMAT_R32G32B32_USCALED, - BRW_SURFACEFORMAT_R32G32B32A32_USCALED -}; - -static GLuint int_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_SNORM, - BRW_SURFACEFORMAT_R32G32_SNORM, - BRW_SURFACEFORMAT_R32G32B32_SNORM, - BRW_SURFACEFORMAT_R32G32B32A32_SNORM -}; - -static GLuint int_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_SSCALED, - BRW_SURFACEFORMAT_R32G32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32A32_SSCALED -}; - -static GLuint ushort_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_UNORM, - BRW_SURFACEFORMAT_R16G16_UNORM, - BRW_SURFACEFORMAT_R16G16B16_UNORM, - BRW_SURFACEFORMAT_R16G16B16A16_UNORM -}; - -static GLuint ushort_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_USCALED, - BRW_SURFACEFORMAT_R16G16_USCALED, - BRW_SURFACEFORMAT_R16G16B16_USCALED, - BRW_SURFACEFORMAT_R16G16B16A16_USCALED -}; - -static GLuint short_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_SNORM, - BRW_SURFACEFORMAT_R16G16_SNORM, - BRW_SURFACEFORMAT_R16G16B16_SNORM, - BRW_SURFACEFORMAT_R16G16B16A16_SNORM -}; - -static GLuint short_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_SSCALED, - BRW_SURFACEFORMAT_R16G16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16A16_SSCALED -}; -static GLuint ubyte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_UNORM, - BRW_SURFACEFORMAT_R8G8_UNORM, - BRW_SURFACEFORMAT_R8G8B8_UNORM, - BRW_SURFACEFORMAT_R8G8B8A8_UNORM -}; -static GLuint ubyte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_USCALED, - BRW_SURFACEFORMAT_R8G8_USCALED, - BRW_SURFACEFORMAT_R8G8B8_USCALED, - BRW_SURFACEFORMAT_R8G8B8A8_USCALED -}; - -static GLuint byte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_SNORM, - BRW_SURFACEFORMAT_R8G8_SNORM, - BRW_SURFACEFORMAT_R8G8B8_SNORM, - BRW_SURFACEFORMAT_R8G8B8A8_SNORM -}; -static GLuint byte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_SSCALED, - BRW_SURFACEFORMAT_R8G8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8A8_SSCALED -}; - - -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -static GLuint get_surface_type( GLenum type, GLuint size, - GLenum format, GLboolean normalized ) +unsigned brw_translate_surface_format( unsigned id ) { - if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", - _mesa_lookup_enum_by_nr(type), size, normalized); - - if (normalized) { - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_norm[size]; - case GL_SHORT: return short_types_norm[size]; - case GL_BYTE: return byte_types_norm[size]; - case GL_UNSIGNED_INT: return uint_types_norm[size]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: - if (format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - assert(size == 4); - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - } - else { - return ubyte_types_norm[size]; - } - default: assert(0); return 0; - } - } - else { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_scale[size]; - case GL_SHORT: return short_types_scale[size]; - case GL_BYTE: return byte_types_scale[size]; - case GL_UNSIGNED_INT: return uint_types_scale[size]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - default: assert(0); return 0; - } + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; } } - -static GLuint get_size( GLenum type ) -{ - switch (type) { - case GL_DOUBLE: return sizeof(GLdouble); - case GL_FLOAT: return sizeof(GLfloat); - case GL_INT: return sizeof(GLint); - case GL_SHORT: return sizeof(GLshort); - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - default: return 0; - } -} - -static GLuint get_index_type(GLenum type) +static unsigned get_index_type(int type) { switch (type) { - case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; - case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; - case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; default: assert(0); return 0; } } -static void wrap_buffers( struct brw_context *brw, - GLuint size ) -{ - if (size < BRW_UPLOAD_INIT_SIZE) - size = BRW_UPLOAD_INIT_SIZE; - - brw->vb.upload.offset = 0; - - if (brw->vb.upload.bo != NULL) - dri_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); -} - -static void get_space( struct brw_context *brw, - GLuint size, - dri_bo **bo_return, - GLuint *offset_return ) -{ - size = ALIGN(size, 64); - - if (brw->vb.upload.bo == NULL || - brw->vb.upload.offset + size > brw->vb.upload.bo->size) { - wrap_buffers(brw, size); - } - - assert(*bo_return == NULL); - dri_bo_reference(brw->vb.upload.bo); - *bo_return = brw->vb.upload.bo; - *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; -} - -static void -copy_array_to_vbo_array( struct brw_context *brw, - struct brw_vertex_element *element, - GLuint dst_stride) -{ - struct intel_context *intel = &brw->intel; - GLuint size = element->count * dst_stride; - - get_space(brw, size, &element->bo, &element->offset); - if (element->glarray->StrideB == 0) { - assert(element->count == 1); - element->stride = 0; - } else { - element->stride = dst_stride; - } - - if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } - } else { - char *dest; - const unsigned char *src = element->glarray->Ptr; - int i; - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - dri_bo_subdata(element->bo, - element->offset, - size, - data); - - _mesa_free(data); - } - } -} -static void brw_prepare_vertices(struct brw_context *brw) +static boolean brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -358,123 +208,38 @@ static void brw_prepare_vertices(struct brw_context *brw) if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - /* Accumulate the list of enabled arrays. */ - brw->vb.nr_enabled = 0; - while (vs_inputs) { - GLuint i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; - vs_inputs &= ~(1 << i); - brw->vb.enabled[brw->vb.nr_enabled++] = input; - } - - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (brw->vb.nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; - return; - } for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - if (_mesa_is_bufferobj(input->glarray->BufferObj)) { - struct intel_buffer_object *intel_buffer = - intel_buffer_object(input->glarray->BufferObj); - - /* Named buffer object: Just reference its contents directly. */ - dri_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - dri_bo_reference(input->bo); - input->offset = (unsigned long)input->glarray->Ptr; - input->stride = input->glarray->StrideB; - input->count = input->glarray->_MaxElement; - - /* This is a common place to reach if the user mistakenly supplies - * a pointer in place of a VBO offset. If we just let it go through, - * we may end up dereferencing a pointer beyond the bounds of the - * GTT. We would hope that the VBO's max_index would save us, but - * Mesa appears to hand us min/max values not clipped to the - * array object's _MaxElement, and _MaxElement frequently appears - * to be wrong anyway. - * - * The VBO spec allows application termination in this case, and it's - * probably a service to the poor programmer to do so rather than - * trying to just not render. - */ - assert(input->offset < input->bo->size); - } else { - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (input->bo != NULL) { - /* Already-uploaded vertex data is present from a previous - * prepare_vertices, but we had to re-validate state due to - * check_aperture failing and a new batch being produced. - */ - continue; - } - - /* Queue the buffer object up to be uploaded in the next pass, - * when we've decided if we're doing interleaved or not. - */ - if (input->attrib == VERT_ATTRIB_POS) { - /* Position array not properly enabled: - */ - if (input->glarray->StrideB == 0) { - intel->Fallback = 1; - return; - } - - interleave = input->glarray->StrideB; - ptr = input->glarray->Ptr; - } - else if (interleave != input->glarray->StrideB || - (const unsigned char *)input->glarray->Ptr - ptr < 0 || - (const unsigned char *)input->glarray->Ptr - ptr > interleave) - { - interleave = 0; - } - - upload[nr_uploads++] = input; - - /* We rebase drawing to start at element zero only when - * varyings are not in vbos, which means we can end up - * uploading non-varying arrays (stride != 0) when min_index - * is zero. This doesn't matter as the amount to upload is - * the same for these arrays whether the draw call is rebased - * or not - we just have to upload the one element. - */ - assert(min_index == 0 || input->glarray->StrideB == 0); - } - } - - /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1 && interleave && interleave <= 256) { - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], interleave); - - for (i = 1; i < nr_uploads; i++) { - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->stride = interleave; - upload[i]->offset = upload[0]->offset + - ((const unsigned char *)upload[i]->glarray->Ptr - ptr); - upload[i]->bo = upload[0]->bo; - dri_bo_reference(upload[i]->bo); + if (brw_is_user_buffer(vb)) { + u_upload_buffer( brw->upload, + min_index * vb->stride, + (max_index + 1 - min_index) * vb->stride, + &offset, + &buffer ); } - } - else { - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { - copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + else + { + offset = 0; + buffer = vb->buffer; + count = stride == 0 ? 1 : max_index + 1 - min_index; } + + /* Named buffer object: Just reference its contents directly. */ + dri_bo_unreference(input->bo); + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + + input->offset = (unsigned long)offset; + input->stride = vb->stride; + input->count = count; + + assert(input->offset < input->bo->size); } brw_prepare_query_begin(brw); @@ -632,13 +397,8 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 48c2b9a..5ec0c58 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -58,7 +58,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 0000000..b351794 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,41 @@ + + /* _NEW_COLOR */ + if (key->logic_op != GL_COPY) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c new file mode 100644 index 0000000..34d6d40 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_debug.c @@ -0,0 +1,2 @@ + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 0000000..da29bc8 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,52 @@ + /* _NEW_STENCIL */ + if (key->dsa.stencil[0].enable) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + cc.cc0.stencil_write_enable = 1; + } + + + if (key->alpha_enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; + } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 0000000..d4ae332 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,25 @@ + +/** + * called from intelDrawBuffer() + */ +static void brw_set_draw_region( struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) +{ + struct brw_context *brw = brw_context(&intel->ctx); + GLuint i; + + /* release old color/depth regions */ + if (brw->state.depth_region != depth_region) + brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + intel_region_release(&brw->state.depth_region); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); + intel_region_reference(&brw->state.depth_region, depth_region); + brw->state.nr_color_regions = num_color_regions; +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 0000000..008f623 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,64 @@ + +/** + * called from intel_batchbuffer_flush and children before sending a + * batchbuffer off. + */ +static void brw_finish_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + brw_emit_query_end(brw); +} + + +/** + * called from intelFlushBatchLocked + */ +static void brw_new_batch( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!brw->no_batch_wrap); + + brw->curbe.need_new_bo = GL_TRUE; + + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + brw->vb.upload.offset = 0; + } +} + + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) +{ + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; +} + +/* called from intelWaitForIdle() and intelFlush() + * + * For now, just flush everything. Could be smarter later. + */ +static GLuint brw_flush_cmd( void ) +{ + struct brw_mi_flush flush; + flush.opcode = CMD_MI_FLUSH; + flush.pad = 0; + flush.flags = BRW_FLUSH_STATE_CACHE; + return *(GLuint *)&flush; +} + + diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 0000000..d199d0b --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,27 @@ + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && + irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1c2c77..9051324 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -59,9 +59,9 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.key = *key; - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index ca8f97f..4cc427a 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -150,7 +150,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) @@ -188,7 +188,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b817b74..6801084 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -270,7 +270,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) /*********************************************************************** * Emit all state: */ -void brw_validate_state( struct brw_context *brw ) +enum pipe_error brw_validate_state( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -278,10 +278,6 @@ void brw_validate_state( struct brw_context *brw ) GLuint i; brw_clear_validated_bos(brw); - - state->mesa |= brw->intel.NewGLState; - brw->intel.NewGLState = 0; - brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { @@ -290,36 +286,23 @@ void brw_validate_state( struct brw_context *brw ) state->cache |= ~0; } - if (brw->fragment_program != ctx->FragmentProgram._Current) { - brw->fragment_program = ctx->FragmentProgram._Current; - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->vertex_program != ctx->VertexProgram._Current) { - brw->vertex_program = ctx->VertexProgram._Current; - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - } - if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return; + return 0; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache(brw); - brw->intel.Fallback = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->prepare) { - atom->prepare(brw); + ret = atom->prepare(brw); + if (ret) + return ret; } } } @@ -329,17 +312,18 @@ void brw_validate_state( struct brw_context *brw ) * If this fails, we can experience GPU lock-ups. */ { - const struct brw_fragment_program *fp; - fp = brw_fragment_program_const(brw->fragment_program); + const struct brw_fragment_program *fp = brw->fragment_program; if (fp) { - assert((fp->tex_units_used & ctx->Texture._EnabledUnits) - == fp->tex_units_used); + assert(fp->info.max_sampler <= brw->nr_samplers && + fp->info.max_texture <= brw->nr_textures); } } + + return 0; } -void brw_upload_state(struct brw_context *brw) +enum pipe_error brw_upload_state(struct brw_context *brw) { struct brw_state_flags *state = &brw->state.dirty; int i; @@ -356,7 +340,7 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -364,12 +348,11 @@ void brw_upload_state(struct brw_context *brw) atom->dirty.brw || atom->dirty.cache); - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } @@ -388,12 +371,11 @@ void brw_upload_state(struct brw_context *brw) for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } } @@ -407,10 +389,11 @@ void brw_upload_state(struct brw_context *brw) brw_print_dirty_count(mesa_bits, state->mesa); brw_print_dirty_count(brw_bits, state->brw); brw_print_dirty_count(cache_bits, state->cache); - fprintf(stderr, "\n"); + debug_printf("\n"); } } - - if (!brw->intel.Fallback) - memset(state, 0, sizeof(*state)); + + /* Clear dirty flags: + */ + memset(state, 0, sizeof(*state)); } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c new file mode 100644 index 0000000..6684f44 --- /dev/null +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -0,0 +1,114 @@ + +/* XXX: could split the primitive list to fallback only on the + * non-conformant primitives. + */ +static GLboolean check_fallbacks( struct brw_context *brw, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) + return GL_FALSE; + + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + + if (ctx->Polygon.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_TRIANGLES) + return GL_TRUE; + } + + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (ctx->Line.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_LINES) + return GL_TRUE; + } + + /* Stipple -- these fallbacks could be resolved with a little + * bit of work? + */ + if (ctx->Line.StippleFlag) { + for (i = 0; i < nr_prims; i++) { + /* GS doesn't get enough information to know when to reset + * the stipple counter?!? + */ + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) + return GL_TRUE; + + if (prim[i].mode == GL_POLYGON && + (ctx->Polygon.FrontMode == GL_LINE || + ctx->Polygon.BackMode == GL_LINE)) + return GL_TRUE; + } + } + + if (ctx->Point.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (prim[i].mode == GL_POINTS) + return GL_TRUE; + } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + + /* Exceeding hw limits on number of VS inputs? + */ + if (brw->nr_ve == 0 || + brw->nr_ve >= BRW_VEP_MAX) { + return TRUE; + } + + /* Position array with zero stride? + */ + if (brw->vs[brw->ve[0]]->stride == 0) + return TRUE; + + + + /* Nothing stopping us from the fast path now */ + return GL_FALSE; +} + + + + diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h new file mode 100644 index 0000000..32b6284 --- /dev/null +++ b/src/gallium/drivers/i965/brw_types.h @@ -0,0 +1,11 @@ +#ifndef BRW_TYPES_H +#define BRW_TYPES_H + +typedef GLuint uint32_t; +typedef GLubyte uint8_t; +typedef GLushort uint16_t; +/* no GLenum, translate all away */ + +typedef GLboolean uint8_t; + +#endif diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index ce21aa4..17f671a 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -35,14 +35,6 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) -{ - GLuint i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} GLuint brw_translate_blend_equation( GLenum mode ) diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index f0c79ef..53a5560 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -61,9 +61,7 @@ static void do_vs_prog( struct brw_context *brw, } if (0) - _mesa_print_program(&c.vp->program.Base); - - + tgsi_dump(&c.vp->tokens, 0); /* Emit GEN4 code. */ @@ -96,9 +94,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); + key.nr_userclip = brw->nr_userclip; + key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL || + brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL); /* Make an early check for the key. */ @@ -116,7 +114,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 1638ef8..7f20c4b 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "main/macros.h" #include "shader/program.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" +#include "pipe/p_shader_tokens.h" #include "brw_context.h" #include "brw_vs.h" @@ -129,6 +129,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth * because it's required -- see urb_read_length setting. */ @@ -226,6 +227,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the * the VS_STATE docs. Our VUEs will always have at least one attribute * sitting in them, even if it's padding. @@ -960,9 +962,6 @@ static void emit_arl( struct brw_vs_compile *c, /** * Return the brw reg for the given instruction's src argument. - * Will return mangled results for SWZ op. The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1024,74 +1023,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, } -static void emit_swz( struct brw_vs_compile *c, - struct brw_reg dst, - const struct prog_instruction *inst) -{ - const GLuint argIndex = 0; - const struct prog_src_register src = inst->SrcReg[argIndex]; - struct brw_compile *p = &c->func; - GLuint zeros_mask = 0; - GLuint ones_mask = 0; - GLuint src_mask = 0; - GLubyte src_swz[4]; - GLboolean need_tmp = (src.Negate && - dst.file != BRW_GENERAL_REGISTER_FILE); - struct brw_reg tmp = dst; - GLuint i; - - if (need_tmp) - tmp = get_tmp(c); - - for (i = 0; i < 4; i++) { - if (dst.dw1.bits.writemask & (1<<i)) { - GLubyte s = GET_SWZ(src.Swizzle, i); - switch (s) { - case SWIZZLE_X: - case SWIZZLE_Y: - case SWIZZLE_Z: - case SWIZZLE_W: - src_mask |= 1<<i; - src_swz[i] = s; - break; - case SWIZZLE_ZERO: - zeros_mask |= 1<<i; - break; - case SWIZZLE_ONE: - ones_mask |= 1<<i; - break; - } - } - } - - /* Do src first, in case dst aliases src: - */ - if (src_mask) { - struct brw_reg arg0; - - arg0 = get_src_reg(c, inst, argIndex); - - arg0 = brw_swizzle(arg0, - src_swz[0], src_swz[1], - src_swz[2], src_swz[3]); - - brw_MOV(p, brw_writemask(tmp, src_mask), arg0); - } - - if (zeros_mask) - brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); - - if (ones_mask) - brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - - if (src.Negate) - brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); - - if (need_tmp) { - brw_MOV(p, dst, tmp); - release_tmp(c, tmp); - } -} /** @@ -1332,20 +1263,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - /* Message registers can't be read, so copy the output into GRF register - if they are used in source registers */ - for (insn = 0; insn < nr_insns; insn++) { - GLuint i; - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; - for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; - GLuint index = src->Index; - GLuint file = src->File; - if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) - c->output_regs[index].used_in_src = GL_TRUE; - } - } - /* Static register allocation */ brw_vs_alloc_regs(c); @@ -1362,18 +1279,14 @@ void brw_vs_emit(struct brw_vs_compile *c ) _mesa_print_instruction(inst); #endif - /* Get argument regs. SWZ is special and does this itself. + /* Get argument regs. */ - if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - index = src->Index; - file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else - args[i] = get_arg(c, inst, i); - } + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + args[i] = get_arg(c, inst, i); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So @@ -1381,10 +1294,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ index = inst->DstReg.Index; file = inst->DstReg.File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; - else - dst = get_dst(c, inst->DstReg); + dst = get_dst(c, inst->DstReg); if (inst->SaturateMode != SATURATE_OFF) { _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", @@ -1392,151 +1302,144 @@ void brw_vs_emit(struct brw_vs_compile *c ) } switch (inst->Opcode) { - case OPCODE_ABS: + case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; - case OPCODE_ADD: + case TGSI_OPCODE_ADD: brw_ADD(p, dst, args[0], args[1]); break; - case OPCODE_COS: + case TGSI_OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: brw_DP3(p, dst, args[0], args[1]); break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: brw_DP4(p, dst, args[0], args[1]); break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: + case TGSI_OPCODE_NRM3: emit_nrm(c, dst, args[0], 3); break; - case OPCODE_NRM4: + case TGSI_OPCODE_NRM4: emit_nrm(c, dst, args[0], 4); break; - case OPCODE_DST: + case TGSI_OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; - case OPCODE_EXP: + case TGSI_OPCODE_EXP: unalias1(c, dst, args[0], emit_exp_noalias); break; - case OPCODE_EX2: + case TGSI_OPCODE_EX2: emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_ARL: + case TGSI_OPCODE_ARL: emit_arl(c, dst, args[0]); break; - case OPCODE_FLR: + case TGSI_OPCODE_FLR: brw_RNDD(p, dst, args[0]); break; - case OPCODE_FRC: + case TGSI_OPCODE_FRC: brw_FRC(p, dst, args[0]); break; - case OPCODE_LOG: + case TGSI_OPCODE_LOG: unalias1(c, dst, args[0], emit_log_noalias); break; - case OPCODE_LG2: + case TGSI_OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: unalias1(c, dst, args[0], emit_lit_noalias); break; - case OPCODE_LRP: + case TGSI_OPCODE_LRP: unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; - case OPCODE_MAD: + case TGSI_OPCODE_MAD: brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; - case OPCODE_MAX: + case TGSI_OPCODE_MAX: emit_max(p, dst, args[0], args[1]); break; - case OPCODE_MIN: + case TGSI_OPCODE_MIN: emit_min(p, dst, args[0], args[1]); break; - case OPCODE_MOV: + case TGSI_OPCODE_MOV: brw_MOV(p, dst, args[0]); break; - case OPCODE_MUL: + case TGSI_OPCODE_MUL: brw_MUL(p, dst, args[0], args[1]); break; - case OPCODE_POW: + case TGSI_OPCODE_POW: emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RCP: + case TGSI_OPCODE_RCP: emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RSQ: + case TGSI_OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; - - case OPCODE_SEQ: + case TGSI_OPCODE_SEQ: emit_seq(p, dst, args[0], args[1]); break; - case OPCODE_SIN: + case TGSI_OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_SNE: + case TGSI_OPCODE_SNE: emit_sne(p, dst, args[0], args[1]); break; - case OPCODE_SGE: + case TGSI_OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; - case OPCODE_SGT: + case TGSI_OPCODE_SGT: emit_sgt(p, dst, args[0], args[1]); break; - case OPCODE_SLT: + case TGSI_OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; - case OPCODE_SLE: + case TGSI_OPCODE_SLE: emit_sle(p, dst, args[0], args[1]); break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; - case OPCODE_SWZ: - /* The args[0] value can't be used here as it won't have - * correctly encoded the full swizzle: - */ - emit_swz(c, dst, inst); - break; - case OPCODE_TRUNC: + case TGSI_OPCODE_TRUNC: /* round toward zero */ brw_RNDZ(p, dst, args[0]); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ if_inst[if_depth]->header.predicate_control = get_predicate(inst); if_depth++; break; - case OPCODE_ELSE: + case TGSI_OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; - case OPCODE_ENDIF: + case TGSI_OPCODE_ENDIF: assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; - case OPCODE_BGNLOOP: + case TGSI_OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; - case OPCODE_BRK: + case TGSI_OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CONT: + case TGSI_OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; GLuint br = 1; @@ -1550,23 +1453,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; - case OPCODE_BRA: + case TGSI_OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CAL: + case TGSI_OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); @@ -1575,27 +1478,27 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_RET: + case TGSI_OPCODE_RET: brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; - case OPCODE_END: + case TGSI_OPCODE_END: end_offset = p->nr_insn; /* this instruction will get patched later to jump past subroutine * code, etc. */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_PRINT: + case TGSI_OPCODE_PRINT: /* no-op */ break; - case OPCODE_BGNSUB: + case TGSI_OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); break; - case OPCODE_ENDSUB: + case TGSI_OPCODE_ENDSUB: /* no-op */ break; default: @@ -1618,33 +1521,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; } - if ((inst->DstReg.File == PROGRAM_OUTPUT) - && (inst->DstReg.Index != VERT_RESULT_HPOS) - && c->output_regs[inst->DstReg.Index].used_in_src) { - brw_MOV(p, get_dst(c, inst->DstReg), dst); - } - - /* Result color clamping. - * - * When destination register is an output register and - * it's primary/secondary front/back color, we have to clamp - * the result to [0,1]. This is done by enabling the - * saturation bit for the last instruction. - * - * We don't use brw_set_saturate() as it modifies - * p->current->header.saturate, which affects all the subsequent - * instructions. Instead, we directly modify the header - * of the last (already stored) instruction. - */ - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if ((inst->DstReg.Index == VERT_RESULT_COL0) - || (inst->DstReg.Index == VERT_RESULT_COL1) - || (inst->DstReg.Index == VERT_RESULT_BFC0) - || (inst->DstReg.Index == VERT_RESULT_BFC1)) { - p->store[p->nr_insn-1].header.saturate = 1; - } - } - release_tmps(c); } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 2292de9..20d3188 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -269,61 +269,46 @@ static void brw_wm_populate_key( struct brw_context *brw, uses_depth, key); + /* Revisit this, figure out if it's really useful, and either push + * it into the state tracker so that everyone benefits (use to + * create fs varients with TEX rather than TXP), or discard. + */ + key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ - /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; - - /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* PIPE_NEW_RAST */ + key->flat_shade = brw->rast.flat_shade; - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* This can be determined by looking at the INTERP mode each input decl. + */ + key->linear_color = 0; /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - - if (unit->_ReallyEnabled) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (i < brw->nr_textures) { + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; + key->yuvtex_swap_mask |= 1 << i; } - key->tex_swizzles[i] = t->_Swizzle; + key->tex_swizzles[i] = t->_Swizzle; + + if (0) + key->shadowtex_mask |= 1<<i; } else { key->tex_swizzles[i] = SWIZZLE_NOOP; } } - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. - */ + /* _NEW_FRAMEBUFFER */ if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + key->drawable_height = brw->fb.cbufs[0].height; } /* CACHE_NEW_VS_PROG */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 872b1f3..756a680 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,6 @@ struct brw_wm_prog_key { GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; GLuint drawable_height; GLuint vp_outputs_written; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index bf80a29..9c47c46 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -125,23 +125,21 @@ static void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(arg0[0], BRW_REGISTER_TYPE_W)); } + /* XXX: is this needed any more, or is this a NOOP? + */ if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -1376,7 +1374,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_MOV: - case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 4e3edfb..5f47d86 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,25 +30,12 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_shader_constants.h" + #include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" - - -/** An invalid texture target */ -#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS - -/** An invalid texture unit */ -#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 #define Y 1 @@ -68,11 +55,6 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; -#if 0 -static const char *wm_file_strings[] = { - "PAYLOAD" -}; -#endif /*********************************************************************** @@ -165,13 +147,13 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) } c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); + return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1)); } static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); + c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp)); } @@ -192,58 +174,29 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, return inst; } -static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - GLuint tex_src_unit, - GLuint tex_src_target, - GLuint tex_shadow, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) { struct prog_instruction *inst = get_fp_inst(c); - assert(tex_src_unit < BRW_MAX_TEX_UNIT || - tex_src_unit == TEX_UNIT_NONE); - assert(tex_src_target < NUM_TEXTURE_TARGETS || - tex_src_target == TEX_TARGET_NONE); - - /* update mask of which texture units are referenced by this program */ - if (tex_src_unit != TEX_UNIT_NONE) - c->fp->tex_units_used |= (1 << tex_src_unit); - memset(inst, 0, sizeof(*inst)); inst->Opcode = op; inst->DstReg = dest; inst->SaturateMode = saturate; - inst->TexSrcUnit = tex_src_unit; - inst->TexSrcTarget = tex_src_target; - inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; return inst; } - - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - return emit_tex_op(c, op, dest, saturate, - TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ - src0, src1, src2); -} -/* Many Mesa opcodes produce the same value across all the result channels. +/* Many opcodes produce the same value across all the result channels. * We'd rather not have to support that splatting in the opcode implementations, * and brw_wm_pass*.c wants to optimize them out by shuffling references around * anyway. We can easily get both by emitting the opcode to one channel, and @@ -267,7 +220,7 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); if (other_channel_mask != 0) { inst = emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(inst0->DstReg, other_channel_mask), 0, src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), @@ -356,7 +309,9 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) } static void emit_interp( struct brw_wm_compile *c, - GLuint idx ) + GLuint semantic, + GLuint semantic_index, + GLuint interp_mode ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); @@ -366,7 +321,7 @@ static void emit_interp( struct brw_wm_compile *c, * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ - switch (idx) { + switch (semantic) { case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ @@ -390,8 +345,8 @@ static void emit_interp( struct brw_wm_compile *c, deltas, src_undef()); break; - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: + + case TGSI_SEMANTIC_COLOR: if (c->key.flat_shade) { emit_op(c, WM_CINTERP, @@ -402,25 +357,13 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - if (c->key.linear_color) { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); - } - else { - /* perspective-corrected color interpolation */ - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - } + emit_op(c, + translate_interp_mode(interp_mode), + dst, + 0, + interp, + deltas, + src_undef()); } break; case FRAG_ATTRIB_FOGC: @@ -434,7 +377,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_YZW), 0, src_swizzle(interp, @@ -468,7 +411,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), 0, src_swizzle(interp, @@ -482,7 +425,7 @@ static void emit_interp( struct brw_wm_compile *c, default: emit_op(c, - WM_PINTERP, + translate_interp_mode(interp_mode), dst, 0, interp, @@ -490,8 +433,6 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); break; } - - c->fp_interp_emitted |= 1<<idx; } /*********************************************************************** @@ -581,7 +522,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.y = mul src0.y, src1.y */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(dst, WRITEMASK_Y), inst->SaturateMode, src0, @@ -596,7 +537,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.xz = swz src0.1zzz */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), @@ -609,7 +550,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.w = mov src1.w */ emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_W), inst->SaturateMode, src1, @@ -631,7 +572,7 @@ static void precalc_lit( struct brw_wm_compile *c, /* dst.xw = swz src0.1111 */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), @@ -643,7 +584,7 @@ static void precalc_lit( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, - OPCODE_LIT, + TGSI_OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), inst->SaturateMode, src0, @@ -681,7 +622,7 @@ static void precalc_tex( struct brw_wm_compile *c, coord = src_reg_from_dst(tmpcoord); /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, OPCODE_MOV, + out = emit_op(c, TGSI_OPCODE_MOV, tmpcoord, 0, src0, @@ -691,7 +632,7 @@ static void precalc_tex( struct brw_wm_compile *c, out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp0, 0, src_swizzle1(coord, X), @@ -699,7 +640,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp1, 0, tmp0src, @@ -707,7 +648,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp0 = 1 / tmp1 */ - emit_op(c, OPCODE_RCP, + emit_op(c, TGSI_OPCODE_RCP, dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, @@ -715,7 +656,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmpCoord = src0 * tmp0 */ - emit_op(c, OPCODE_MUL, + emit_op(c, TGSI_OPCODE_MUL, tmpcoord, 0, src0, @@ -738,7 +679,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, tmpcoord, 0, inst->SrcReg[0], @@ -785,7 +726,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, tmp, inst->SaturateMode, unit, @@ -798,7 +739,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp.xyz = ADD TMP, C0 */ emit_op(c, - OPCODE_ADD, + TGSI_OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), 0, tmpsrc, @@ -809,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), 0, tmpsrc, @@ -824,7 +765,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), @@ -834,7 +775,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), @@ -846,7 +787,7 @@ static void precalc_tex( struct brw_wm_compile *c, else { /* ordinary RGBA tex instruction */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, inst->DstReg, inst->SaturateMode, unit, @@ -861,7 +802,7 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { /* swizzle the result of the TEX instruction */ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); - emit_op(c, OPCODE_SWZ, + emit_op(c, TGSI_OPCODE_MOV, inst->DstReg, SATURATE_OFF, /* saturate already done above */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), @@ -884,7 +825,7 @@ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_src_register src = inst->SrcReg[0]; GLboolean retVal; - assert(inst->Opcode == OPCODE_TXP); + assert(inst->Opcode == TGSI_OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -921,7 +862,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.w = RCP inst.arg[0][3] */ emit_op(c, - OPCODE_RCP, + TGSI_OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), @@ -931,7 +872,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), 0, src0, @@ -1015,6 +956,7 @@ static void validate_src_regs( struct brw_wm_compile *c, GLuint idx = inst->SrcReg[i].Index; if (!(c->fp_interp_emitted & (1<<idx))) { emit_interp(c, idx); + c->fp_interp_emitted |= 1<<idx; } } } @@ -1094,71 +1036,64 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ switch (inst->Opcode) { - case OPCODE_SWZ: + case TGSI_OPCODE_ABS: out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - break; - - case OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; + out->Opcode = TGSI_OPCODE_MOV; out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: out = emit_insn(c, inst); - out->Opcode = OPCODE_ADD; + out->Opcode = TGSI_OPCODE_ADD; out->SrcReg[1].Negate ^= NEGATE_XYZW; break; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XY; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: precalc_dst(c, inst); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: precalc_lit(c, inst); break; - case OPCODE_TEX: + case TGSI_OPCODE_TEX: precalc_tex(c, inst); break; - case OPCODE_TXP: + case TGSI_OPCODE_TXP: precalc_txp(c, inst); break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XYZ; break; - case OPCODE_KIL: + case TGSI_OPCODE_KIL: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask = 0; break; - case OPCODE_END: + case TGSI_OPCODE_END: emit_fb_write(c); break; - case OPCODE_PRINT: - break; default: if (brw_wm_is_scalar_result(inst->Opcode)) emit_scalar_insn(c, inst); diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c9fe1dd..d836e2f 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -6,9 +6,6 @@ #include "brw_eu.h" #include "brw_wm.h" -enum _subroutine { - SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 -}; static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct prog_instruction *inst, @@ -32,10 +29,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: case OPCODE_BGNLOOP: return GL_TRUE; default: @@ -1495,1036 +1488,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) 0, 16, 2 ); } -/* One-, two- and three-dimensional Perlin noise, similar to the description - in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ -static void noise1_sub( struct brw_wm_compile *c ) { - struct brw_compile *p = &c->func; - struct brw_reg param, - x0, x1, /* gradients at each end */ - t, tmp[ 2 ], /* float temporaries */ - itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0 = alloc_tmp( c ); - x1 = alloc_tmp( c ); - t = alloc_tmp( c ); - tmp[ 0 ] = alloc_tmp( c ); - tmp[ 1 ] = alloc_tmp( c ); - itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); - itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); - itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); - itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); - itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); - - param = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - - /* Arrange the two end coordinates into scalars (itmp0/itmp1) to - be hashed. Also compute the remainder (offset within the unit - length), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); - brw_FRC( p, param, param ); - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); - brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - - /* We're now ready to perform the hashing. The two hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the two gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 31 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); - brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); - - brw_MUL( p, x0, x0, param ); - brw_MUL( p, x1, x1, t ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_MUL( p, param, tmp[ 0 ], param ); - brw_MUL( p, x1, x1, param ); - brw_ADD( p, x0, x0, x1 ); - /* scale by pow( 2, -30 ), to compensate for the format conversion - above and an extra factor of 2 so that a single gradient covers - the [-1,1] range */ - brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise1( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src, param, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src = get_src_reg( c, inst, 0, 0 ); - - param = alloc_tmp( c ); - - brw_MOV( p, param, src ); - - invoke_subroutine( c, SUB_NOISE1, noise1_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV( p, dst, param ); - } - } - if( inst->SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -static void noise2_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, - x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ - t, tmp[ 4 ], /* float temporaries */ - itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 4; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - } - itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); - itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); - itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); - - param0 = lookup_tmp( c, mark - 3 ); - param1 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - square), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), - low_words( itmp[ 1 ] ) ); - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); - brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); - brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); - - /* We're now ready to perform the hashing. The four hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the four gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); - - brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); - brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); - brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the - pipeline */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_MUL( p, param0, tmp[ 0 ], param0 ); - brw_MUL( p, param1, tmp[ 1 ], param1 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, param1 ); - brw_MUL( p, x1y1, x1y1, param1 ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. There are horrible register dependencies here, - but we have nothing else to do. */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, param0 ); - brw_ADD( p, x0y0, x0y0, x1y0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise2( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, param0, param1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - - invoke_subroutine( c, SUB_NOISE2, noise2_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV( p, dst, param0 ); - } - } - if( inst->SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * The three-dimensional case is much like the one- and two- versions above, - * but since the number of corners is rapidly growing we now pack 16 16-bit - * hashes into each register to extract more parallelism from the EUs. - */ -static void noise3_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, param2, - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - xi, yi, zi, /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - xi = alloc_tmp( c ); - yi = alloc_tmp( c ); - zi = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - param0 = lookup_tmp( c, mark - 4 ); - param1 = lookup_tmp( c, mark - 3 ); - param2 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_FRC( p, param2, param2 ); - /* Since we now have only 16 bits of precision in the hash, we must - be more careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); - brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); - brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* The interpolation coefficients are still around from last time, so - again interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* The final interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise3( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, param0, param1, param2, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - - invoke_subroutine( c, SUB_NOISE3, noise3_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV( p, dst, param0 ); - } - } - if( inst->SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * For the four-dimensional case, the little micro-optimisation benefits - * we obtain by unrolling all the loops aren't worth the massive bloat it - * now causes. Instead, we loop twice around performing a similar operation - * to noise3, once for the w=0 cube and once for the w=1, with a bit more - * code to glue it all together. - */ -static void noise4_sub( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg param[ 4 ], - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - w0, /* noise for the w=0 cube */ - floors[ 2 ], /* integer coordinates of base corner of hypercube */ - interp[ 4 ], /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i, j; - int mark = mark_tmps( c ); - GLuint loop, origin; - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - w0 = alloc_tmp( c ); - floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - - for( i = 0; i < 4; i++ ) { - param[ i ] = lookup_tmp( c, mark - 5 + i ); - interp[ i ] = alloc_tmp( c ); - } - - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* We only want 16 bits of precision from the integral part of each - co-ordinate, but unfortunately the RNDD semantics would saturate - at 16 bits if we performed the operation directly to a 16-bit - destination. Therefore, we round to 32-bit temporaries where - appropriate, and then store only the lower 16 bits. */ - brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); - brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); - brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); - brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); - - /* Modify the flag register here, because the side effect is useful - later (see below). We know for certain that all flags will be - cleared, since the FRC instruction cannot possibly generate - negative results. Even for exceptional inputs (infinities, denormals, - NaNs), the architecture guarantees that the L conditional is false. */ - brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); - brw_FRC( p, param[ 0 ], param[ 0 ] ); - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - for( i = 1; i < 4; i++ ) - brw_FRC( p, param[ i ], param[ i ] ); - - /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first - of all. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); - for( j = 0; j < 3; j++ ) - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - - /* Mark the current address, as it will be a jump destination. The - following code will be executed twice: first, with the flag - register clear indicating the w=0 case, and second with flags - set for w=1. */ - loop = p->nr_insn; - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Since we have only 16 bits of precision in the hash, we - must be careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), - brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 0xA359 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Here we interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Interpolate in the y dimension: */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* Another interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* Exit the loop if we've computed both cubes... */ - origin = p->nr_insn; - brw_push_insn_state( p ); - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); - brw_pop_insn_state( p ); - - /* Save the result for the w=0 case, and increment the w coordinate: */ - brw_MOV( p, w0, tmp[ 0 ] ); - brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 1 ) ); - - /* Loop around for the other cube. Explicitly set the flag register - (unfortunately we must spend an extra instruction to do this: we - can't rely on a side effect of the previous MOV or ADD because - conditional modifiers which are normally true might be false in - exceptional circumstances, e.g. given a NaN input; the add to - brw_ip_reg() is not suitable because the IP is not an 8-vector). */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), - brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); - brw_pop_insn_state( p ); - - /* Patch the previous conditional branch now that we know the - destination address. */ - brw_set_src1( p->store + origin, - brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); - - /* The very last interpolation. */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise4( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - src3 = get_src_reg( c, inst, 0, 3 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - param3 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - brw_MOV( p, param3, src3 ); - - invoke_subroutine( c, SUB_NOISE4, noise4_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV( p, dst, param0 ); - } - } - if( inst->SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} static void emit_wpos_xy(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -2543,19 +1507,18 @@ static void emit_wpos_xy(struct brw_wm_compile *c, * X and Y channels. */ if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(src0[0], BRW_REGISTER_TYPE_W)); } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -2827,7 +1790,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: - case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2903,18 +1865,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_MAD: emit_mad(c, inst); break; - case OPCODE_NOISE1: - emit_noise1(c, inst); - break; - case OPCODE_NOISE2: - emit_noise2(c, inst); - break; - case OPCODE_NOISE3: - emit_noise3(c, inst); - break; - case OPCODE_NOISE4: - emit_noise4(c, inst); - break; case OPCODE_TEX: emit_tex(c, inst); break; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 6279258..0c411b5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -422,7 +422,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) */ switch (inst->Opcode) { case OPCODE_MOV: - case OPCODE_SWZ: if (!inst->SaturateMode) { pass0_precalc_mov(c, inst); } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b449394..d940ec0 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -120,7 +120,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) GLuint writemask; GLuint read0, read1, read2; - if (inst->opcode == OPCODE_KIL) { + if (inst->opcode == TGSI_OPCODE_KIL) { track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ continue; } @@ -154,76 +154,75 @@ void brw_wm_pass1( struct brw_wm_compile *c ) /* Mark all inputs which contribute to the marked outputs: */ switch (inst->opcode) { - case OPCODE_ABS: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_MOV: - case OPCODE_SWZ: - case OPCODE_TRUNC: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_TRUNC: read0 = writemask; break; - case OPCODE_SUB: - case OPCODE_SLT: - case OPCODE_SLE: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SEQ: - case OPCODE_SNE: - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: read0 = writemask; read1 = writemask; break; - case OPCODE_DDX: - case OPCODE_DDY: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: read0 = writemask; break; - case OPCODE_MAD: - case OPCODE_CMP: - case OPCODE_LRP: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_LRP: read0 = writemask; read1 = writemask; read2 = writemask; break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; read1 = read0; break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_SCS: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: read0 = WRITEMASK_X; break; - case OPCODE_POW: + case TGSI_OPCODE_POW: read0 = WRITEMASK_X; read1 = WRITEMASK_X; break; - case OPCODE_TEX: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: /* Shadow ignored for txb. */ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; @@ -254,28 +253,28 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read2 = WRITEMASK_W; /* pixel w */ break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZW; break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: read0 = WRITEMASK_XYZW; read1 = WRITEMASK_XYZW; break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: read0 = WRITEMASK_XYW; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: + case TGSI_OPCODE_KIL_NV: default: break; } diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h index 3dc8653..3c38f16 100644 --- a/src/gallium/drivers/i965/intel_chipset.h +++ b/src/gallium/drivers/i965/intel_chipset.h @@ -66,7 +66,6 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_B43_G 0x2E42 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 @@ -84,8 +83,7 @@ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G) + devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) |