diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
35 files changed, 778 insertions, 805 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 412d82a..74a66af 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -238,10 +238,10 @@ static void upload_blend_constant_color(struct brw_context *brw) BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2)); - OUT_BATCH_F(ctx->Color.BlendColor[0]); - OUT_BATCH_F(ctx->Color.BlendColor[1]); - OUT_BATCH_F(ctx->Color.BlendColor[2]); - OUT_BATCH_F(ctx->Color.BlendColor[3]); + OUT_BATCH_F(ctx->Color.BlendColorUnclamped[0]); + OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]); + OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]); + OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]); CACHED_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9483ec6..230d326 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -182,9 +182,21 @@ GLboolean brwCreateContext( int api, /* WM maximum threads is number of EUs times number of threads per EU. */ if (intel->gen >= 6) { - brw->urb.size = 1024; - brw->vs_max_threads = 60; - brw->wm_max_threads = 80; + if (IS_GT2(intel->intelScreen->deviceID)) { + /* This could possibly be 80, but is supposed to require + * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a + * GPU reset to change. + */ + brw->wm_max_threads = 40; + brw->vs_max_threads = 60; + brw->urb.size = 64; /* volume 5c.5 section 5.1 */ + brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */ + } else { + brw->wm_max_threads = 40; + brw->vs_max_threads = 24; + brw->urb.size = 32; /* volume 5c.5 section 5.1 */ + brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */ + } } else if (intel->gen == 5) { brw->urb.size = 1024; brw->vs_max_threads = 72; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7b0551a..1daa49a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -139,7 +139,7 @@ struct brw_context; * by any 3D rendering. */ #define BRW_NEW_BATCH 0x10000 -/** brw->depth_region updated */ +/** \see brw.state.depth_region */ #define BRW_NEW_DEPTH_BUFFER 0x20000 #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 @@ -464,8 +464,27 @@ struct brw_context struct { struct brw_state_flags dirty; + /** + * \name Cached region pointers + * + * When the draw buffer is updated, often the depth buffer is not + * changed. Caching the pointer to the buffer's region allows us to + * detect when the buffer has in fact changed, and allows us to avoid + * updating the buffer's GPU state when it has not. + * + * The original of each cached pointer is an instance of + * \c intel_renderbuffer.region. + * + * \see brw_set_draw_region() + * + * \{ + */ + + /** \see struct brw_tracked_state brw_depthbuffer */ struct intel_region *depth_region; + /** \} */ + /** * List of buffers accumulated in brw_validate_state to receive * drm_intel_bo_check_aperture treatment before exec, so we can @@ -549,18 +568,21 @@ struct brw_context GLboolean constrained; + GLuint max_vs_handles; /* Maximum number of VS handles */ + GLuint max_gs_handles; /* Maximum number of GS handles */ + GLuint nr_vs_entries; GLuint nr_gs_entries; GLuint nr_clip_entries; GLuint nr_sf_entries; GLuint nr_cs_entries; - /* gen6 */ + /* gen6: + * The length of each URB entry owned by the VS (or GS), as + * a number of 1024-bit (128-byte) rows. Should be >= 1. + */ GLuint vs_size; -/* GLuint gs_size; */ -/* GLuint clip_size; */ -/* GLuint sf_size; */ -/* GLuint cs_size; */ + GLuint gs_size; GLuint vs_start; GLuint gs_start; @@ -639,7 +661,9 @@ struct brw_context drm_intel_bo *prog_bo; drm_intel_bo *state_bo; + uint32_t state_offset; drm_intel_bo *vp_bo; + uint32_t vp_offset; } sf; struct { diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6c61aef..effcb6c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -43,6 +43,12 @@ #define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 #define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 +#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ +/* DW0 */ +# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) +# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) + #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@ -65,9 +71,6 @@ #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 -#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 -#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 - #define BRW_ANISORATIO_2 0 #define BRW_ANISORATIO_4 1 #define BRW_ANISORATIO_6 2 @@ -147,6 +150,7 @@ #define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 #define BRW_DEPTHFORMAT_D32_FLOAT 1 #define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */ #define BRW_DEPTHFORMAT_D16_UNORM 5 #define BRW_FLOATING_POINT_IEEE_754 0 @@ -1131,8 +1135,6 @@ #define CMD_PIPE_CONTROL 0x7a00 -#define CMD_3D_PRIM 0x7b00 - #define CMD_MI_FLUSH 0x0200 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index f5abe02..2db70c5 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -129,30 +129,31 @@ static void brw_emit_prim(struct brw_context *brw, const struct _mesa_prim *prim, uint32_t hw_prim) { - struct brw_3d_primitive prim_packet; struct intel_context *intel = &brw->intel; + int verts_per_instance; + int vertex_access_type; + int start_vertex_location; + int base_vertex_location; DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); - prim_packet.header.opcode = CMD_3D_PRIM; - prim_packet.header.length = sizeof(prim_packet)/4 - 2; - prim_packet.header.pad = 0; - prim_packet.header.topology = hw_prim; - prim_packet.header.indexed = prim->indexed; - - prim_packet.verts_per_instance = trim(prim->mode, prim->count); - prim_packet.start_vert_location = prim->start; - if (prim->indexed) - prim_packet.start_vert_location += brw->ib.start_vertex_offset; - else - prim_packet.start_vert_location += brw->vb.start_vertex_bias; - prim_packet.instance_count = 1; - prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = prim->basevertex; - if (prim->indexed) - prim_packet.base_vert_location += brw->vb.start_vertex_bias; + start_vertex_location = prim->start; + base_vertex_location = prim->basevertex; + if (prim->indexed) { + vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; + start_vertex_location += brw->ib.start_vertex_offset; + base_vertex_location += brw->vb.start_vertex_bias; + } else { + vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; + start_vertex_location += brw->vb.start_vertex_bias; + } + + verts_per_instance = trim(prim->mode, prim->count); + /* If nothing to emit, just return. */ + if (verts_per_instance == 0) + return; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -162,10 +163,18 @@ static void brw_emit_prim(struct brw_context *brw, if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel); } - if (prim_packet.verts_per_instance) { - intel_batchbuffer_data(&brw->intel, &prim_packet, - sizeof(prim_packet), false); - } + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + vertex_access_type); + OUT_BATCH(verts_per_instance); + OUT_BATCH(start_vertex_location); + OUT_BATCH(1); // instance count + OUT_BATCH(0); // start instance location + OUT_BATCH(base_vertex_location); + ADVANCE_BATCH(); + if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel); } @@ -271,20 +280,20 @@ static GLboolean check_fallbacks( struct brw_context *brw, struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; if (texUnit->Enabled) { if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->Sampler.WrapS == GL_CLAMP) { return GL_TRUE; } } if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapT == GL_CLAMP) { return GL_TRUE; } } if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapR == GL_CLAMP) { return GL_TRUE; } } diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index f1d0069..9389eb6 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -575,7 +575,7 @@ static void brw_emit_vertices(struct brw_context *brw) if (intel->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else - OUT_BATCH(buffer->bo->size / buffer->stride); + OUT_BATCH(0); OUT_BATCH(0); /* Instance data step rate */ brw->vb.current_buffers[i].handle = buffer->bo->handle; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 3b5c4c0..7e63482 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -34,6 +34,28 @@ #include "brw_defines.h" #include "brw_eu.h" +/* Returns the corresponding conditional mod for swapping src0 and + * src1 in e.g. CMP. + */ +uint32_t +brw_swap_cmod(uint32_t cmod) +{ + switch (cmod) { + case BRW_CONDITIONAL_Z: + case BRW_CONDITIONAL_NZ: + return cmod; + case BRW_CONDITIONAL_G: + return BRW_CONDITIONAL_LE; + case BRW_CONDITIONAL_GE: + return BRW_CONDITIONAL_L; + case BRW_CONDITIONAL_L: + return BRW_CONDITIONAL_GE; + case BRW_CONDITIONAL_LE: + return BRW_CONDITIONAL_G; + default: + return ~0; + } +} /* How does predicate control work when execution_size != 8? Do I @@ -60,6 +82,11 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) p->current->header.predicate_control = pc; } +void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) +{ + p->current->header.predicate_inverse = predicate_inverse; +} + void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { p->current->header.destreg__conditionalmod = conditional; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 2d2ed9d..718b380 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -772,6 +772,7 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); void brw_set_compression_control( struct brw_compile *p, GLboolean control ); void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse); void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); void brw_set_acc_write_control(struct brw_compile *p, GLuint value); @@ -1017,6 +1018,8 @@ void brw_set_src1( struct brw_instruction *insn, void brw_set_uip_jip(struct brw_compile *p); +uint32_t brw_swap_cmod(uint32_t cmod); + /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); void brw_remove_duplicate_mrf_moves(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 21ce92c..71485cd 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -52,6 +52,34 @@ static void guess_execution_size(struct brw_compile *p, } +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +static void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + GLuint msg_reg_nr) +{ + struct intel_context *intel = &p->brw->intel; + if (intel->gen != 6) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(*src, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + + static void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, struct brw_reg dest) @@ -468,10 +496,9 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_render_cache.response_length = response_length; insn->bits3.dp_render_cache.msg_length = msg_length; insn->bits3.dp_render_cache.end_of_thread = end_of_thread; + + /* We always use the render cache for write messages */ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - /* XXX really need below? */ - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits2.send_gen5.end_of_thread = end_of_thread; } else if (intel->gen == 5) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; @@ -511,6 +538,13 @@ brw_set_dp_read_message(struct brw_context *brw, brw_set_src1(insn, brw_imm_d(0)); if (intel->gen >= 6) { + uint32_t target_function; + + if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) + target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */ + else + target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */ + insn->bits3.dp_render_cache.binding_table_index = binding_table_index; insn->bits3.dp_render_cache.msg_control = msg_control; insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; @@ -520,10 +554,7 @@ brw_set_dp_read_message(struct brw_context *brw, insn->bits3.dp_render_cache.response_length = response_length; insn->bits3.dp_render_cache.msg_length = msg_length; insn->bits3.dp_render_cache.end_of_thread = 0; - insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ; - /* XXX really need below? */ - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits2.send_gen5.end_of_thread = 0; + insn->header.destreg__conditionalmod = target_function; } else if (intel->gen == 5) { insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; insn->bits3.dp_read_gen5.msg_control = msg_control; @@ -1458,9 +1489,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p, GLuint offset) { struct intel_context *intel = &p->brw->intel; - uint32_t msg_control; + uint32_t msg_control, msg_type; int mlen; + if (intel->gen >= 6) + offset /= 16; + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); if (num_regs == 1) { @@ -1526,13 +1560,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p, } brw_set_dest(p, insn, dest); - brw_set_src0(insn, brw_null_reg()); + if (intel->gen >= 6) { + brw_set_src0(insn, mrf); + } else { + brw_set_src0(insn, brw_null_reg()); + } + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ msg_control, - BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_type, mlen, GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ @@ -1557,9 +1600,13 @@ brw_oword_block_read_scratch(struct brw_compile *p, int num_regs, GLuint offset) { + struct intel_context *intel = &p->brw->intel; uint32_t msg_control; int rlen; + if (intel->gen >= 6) + offset /= 16; + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); dest = retype(dest, BRW_REGISTER_TYPE_UW); @@ -1596,14 +1643,18 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.destreg__conditionalmod = mrf.nr; brw_set_dest(p, insn, dest); /* UW? */ - brw_set_src0(insn, brw_null_reg()); + if (intel->gen >= 6) { + brw_set_src0(insn, mrf); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ msg_control, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache (render/scratch) */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1, /* msg_length */ rlen); } @@ -1771,6 +1822,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, GLuint bind_table_index) { struct intel_context *intel = &p->brw->intel; + struct brw_reg src = brw_vec8_grf(0, 0); int msg_type; /* Setup MRF[1] with offset into const buffer */ @@ -1787,6 +1839,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); + gen6_resolve_implied_move(p, &src, 0); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; @@ -1795,7 +1848,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.mask_control = BRW_MASK_DISABLE; brw_set_dest(p, insn, dest); - brw_set_src0(insn, brw_vec8_grf(0, 0)); + brw_set_src0(insn, src); if (intel->gen == 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; @@ -1809,7 +1862,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, bind_table_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* msg_length */ 1); /* response_length */ } @@ -1966,20 +2019,7 @@ void brw_SAMPLE(struct brw_compile *p, { struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { - if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || - src0.nr != BRW_ARF_NULL) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); - brw_pop_insn_state(p); - } - src0 = brw_message_reg(msg_reg_nr); - } + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ @@ -2034,17 +2074,7 @@ void brw_urb_WRITE(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), - retype(src0, BRW_REGISTER_TYPE_UD)); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); - } + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); @@ -2154,17 +2184,7 @@ void brw_ff_sync(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), - retype(src0, BRW_REGISTER_TYPE_UD)); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); - } + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dest); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8b3f5ad..5426925 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -177,6 +177,23 @@ type_size(const struct glsl_type *type) } } +void +fs_visitor::fail(const char *format, ...) +{ + if (!failed) { + failed = true; + + if (INTEL_DEBUG & DEBUG_WM) { + fprintf(stderr, "FS compile failed: "); + + va_list va; + va_start(va, format); + vfprintf(stderr, format, va); + va_end(va); + } + } +} + /** * Returns how many MRFs an FS opcode will write over. * @@ -382,60 +399,32 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) void fs_visitor::setup_builtin_uniform_values(ir_variable *ir) { - const struct gl_builtin_uniform_desc *statevar = NULL; - - for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) { - statevar = &_mesa_builtin_uniform_desc[i]; - if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0) - break; - } - - if (!statevar->name) { - this->fail = true; - printf("Failed to find builtin uniform `%s'\n", ir->name); - return; - } - - int array_count; - if (ir->type->is_array()) { - array_count = ir->type->length; - } else { - array_count = 1; - } + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); - for (int a = 0; a < array_count; a++) { - for (unsigned int i = 0; i < statevar->num_elements; i++) { - struct gl_builtin_uniform_element *element = &statevar->elements[i]; - int tokens[STATE_LENGTH]; - - memcpy(tokens, element->tokens, sizeof(element->tokens)); - if (ir->type->is_array()) { - tokens[1] = a; - } - - /* This state reference has already been setup by ir_to_mesa, - * but we'll get the same index back here. - */ - int index = _mesa_add_state_reference(this->fp->Base.Parameters, - (gl_state_index *)tokens); + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, but we'll + * get the same index back here. + */ + int index = _mesa_add_state_reference(this->fp->Base.Parameters, + (gl_state_index *)slots[i].tokens); - /* Add each of the unique swizzles of the element as a - * parameter. This'll end up matching the expected layout of - * the array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned int i = 0; i < 4; i++) { - int swiz = GET_SWZ(element->swizzle, i); - if (swiz == last_swiz) - break; - last_swiz = swiz; + /* Add each of the unique swizzles of the element as a parameter. + * This'll end up matching the expected layout of the + * array/matrix/structure we're trying to fill in. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + if (swiz == last_swiz) + break; + last_swiz = swiz; - c->prog_data.param_convert[c->prog_data.nr_params] = - PARAM_NO_CONVERT; - this->param_index[c->prog_data.nr_params] = index; - this->param_offset[c->prog_data.nr_params] = swiz; - c->prog_data.nr_params++; - } + c->prog_data.param_convert[c->prog_data.nr_params] = + PARAM_NO_CONVERT; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = swiz; + c->prog_data.nr_params++; } } } @@ -451,15 +440,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) /* gl_FragCoord.x */ if (ir->pixel_center_integer) { - emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); + emit(BRW_OPCODE_MOV, wpos, this->pixel_x); } else { - emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f))); + emit(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)); } wpos.reg_offset++; /* gl_FragCoord.y */ if (!flip && ir->pixel_center_integer) { - emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); + emit(BRW_OPCODE_MOV, wpos, this->pixel_y); } else { fs_reg pixel_y = this->pixel_y; float offset = (ir->pixel_center_integer ? 0.0 : 0.5); @@ -469,22 +458,22 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) offset += c->key.drawable_height - 1.0; } - emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset))); + emit(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)); } wpos.reg_offset++; /* gl_FragCoord.z */ if (intel->gen >= 6) { - emit(fs_inst(BRW_OPCODE_MOV, wpos, - fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + emit(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); } else { - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + emit(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2)); } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ - emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w)); + emit(BRW_OPCODE_MOV, wpos, this->wpos_w); return reg; } @@ -503,7 +492,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) if (ir->type->is_array()) { array_elements = ir->type->length; if (array_elements == 0) { - this->fail = true; + fail("dereferenced array '%s' has length 0\n", ir->name); } type = ir->type->fields.array; } else { @@ -523,37 +512,33 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) continue; } - if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || - location == FRAG_ATTRIB_COL1)) { + bool is_gl_Color = + location == FRAG_ATTRIB_COL0 || location == FRAG_ATTRIB_COL1; + + if (c->key.flat_shade && is_gl_Color) { /* Constant interpolation (flat shading) case. The SF has * handed us defined values in only the constant offset * field of the setup reg. */ - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); + for (unsigned int k = 0; k < type->vector_elements; k++) { + struct brw_reg interp = interp_reg(location, k); interp = suboffset(interp, 3); - emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); + emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); attr.reg_offset++; } } else { /* Perspective interpolation case. */ - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); - emit(fs_inst(FS_OPCODE_LINTERP, - attr, - this->delta_x, - this->delta_y, - fs_reg(interp))); + for (unsigned int k = 0; k < type->vector_elements; k++) { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); attr.reg_offset++; } - if (intel->gen < 6) { + if (intel->gen < 6 && !(is_gl_Color && c->key.linear_color)) { attr.reg_offset -= type->vector_elements; - for (unsigned int c = 0; c < type->vector_elements; c++) { - emit(fs_inst(BRW_OPCODE_MUL, - attr, - attr, - this->pixel_w)); + for (unsigned int k = 0; k < type->vector_elements; k++) { + emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); attr.reg_offset++; } } @@ -572,28 +557,21 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) /* The frontfacing comes in as a bit in the thread payload. */ if (intel->gen >= 6) { - emit(fs_inst(BRW_OPCODE_ASR, - *reg, - fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(15))); - emit(fs_inst(BRW_OPCODE_NOT, - *reg, - *reg)); - emit(fs_inst(BRW_OPCODE_AND, - *reg, - *reg, - fs_reg(1))); + emit(BRW_OPCODE_ASR, *reg, + fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), + fs_reg(15)); + emit(BRW_OPCODE_NOT, *reg, *reg); + emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1)); } else { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); /* bit 31 is "primitive is back face", so checking < (1 << 31) gives * us front face */ - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, - *reg, - fs_reg(r1_6ud), - fs_reg(1u << 31))); + fs_inst *inst = emit(BRW_OPCODE_CMP, *reg, + fs_reg(r1_6ud), + fs_reg(1u << 31)); inst->conditional_mod = BRW_CONDITIONAL_L; - emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u))); + emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)); } return reg; @@ -628,11 +606,11 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) src.abs || src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); - emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); + emit(BRW_OPCODE_MOV, expanded, src); src = expanded; } - fs_inst *inst = emit(fs_inst(opcode, dst, src)); + fs_inst *inst = emit(opcode, dst, src); if (intel->gen < 6) { inst->base_mrf = 2; @@ -658,20 +636,20 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) */ if (src0.file == UNIFORM || src0.abs || src0.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); - emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); + emit(BRW_OPCODE_MOV, expanded, src0); src0 = expanded; } if (src1.file == UNIFORM || src1.abs || src1.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); - emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); + emit(BRW_OPCODE_MOV, expanded, src1); src1 = expanded; } - inst = emit(fs_inst(opcode, dst, src0, src1)); + inst = emit(opcode, dst, src0, src1); } else { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1)); - inst = emit(fs_inst(opcode, dst, src0, reg_null_f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1); + inst = emit(opcode, dst, src0, reg_null_f); inst->base_mrf = base_mrf; inst->mlen = 2; @@ -788,7 +766,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir) fs_reg src = this->result; this->result = fs_reg(this, ir->type); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); + fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); inst->saturate = true; return true; @@ -834,9 +812,8 @@ fs_visitor::visit(ir_expression *ir) ir->operands[operand]->accept(this); if (this->result.file == BAD_FILE) { ir_print_visitor v; - printf("Failed to get tree for expression operand:\n"); + fail("Failed to get tree for expression operand:\n"); ir->operands[operand]->accept(&v); - this->fail = true; } op[operand] = this->result; @@ -859,7 +836,7 @@ fs_visitor::visit(ir_expression *ir) /* Note that BRW_OPCODE_NOT is not appropriate here, since it is * ones complement of the whole register, not just bit 0. */ - emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1))); + emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); break; case ir_unop_neg: op[0].negate = !op[0].negate; @@ -873,16 +850,16 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_sign: temp = fs_reg(this, ir->type); - emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f))); + emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); + inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); inst->conditional_mod = BRW_CONDITIONAL_G; - inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f))); + inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); inst->predicated = true; - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f))); + inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); inst->conditional_mod = BRW_CONDITIONAL_L; - inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f))); + inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); inst->predicated = true; break; @@ -910,21 +887,21 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_dFdx: - emit(fs_inst(FS_OPCODE_DDX, this->result, op[0])); + emit(FS_OPCODE_DDX, this->result, op[0]); break; case ir_unop_dFdy: - emit(fs_inst(FS_OPCODE_DDY, this->result, op[0])); + emit(FS_OPCODE_DDY, this->result, op[0]); break; case ir_binop_add: - emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1])); + emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); break; case ir_binop_sub: assert(!"not reached: should be handled by ir_sub_to_add_neg"); break; case ir_binop_mul: - emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1])); + emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -946,21 +923,21 @@ fs_visitor::visit(ir_expression *ir) if (intel->gen < 5) temp.type = op[0].type; - inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); inst->conditional_mod = brw_conditional_for_comparison(ir->operation); - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); + emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)); break; case ir_binop_logic_xor: - emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); + emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); break; case ir_binop_logic_or: - emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); + emit(BRW_OPCODE_OR, this->result, op[0], op[1]); break; case ir_binop_logic_and: - emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); + emit(BRW_OPCODE_AND, this->result, op[0], op[1]); break; case ir_binop_dot: @@ -988,7 +965,7 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_b2f: case ir_unop_b2i: case ir_unop_f2i: - emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0])); + emit(BRW_OPCODE_MOV, this->result, op[0]); break; case ir_unop_f2b: case ir_unop_i2b: @@ -997,42 +974,41 @@ fs_visitor::visit(ir_expression *ir) if (intel->gen < 5) temp.type = op[0].type; - inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); + inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - inst = emit(fs_inst(BRW_OPCODE_AND, this->result, - this->result, fs_reg(1))); + inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); break; case ir_unop_trunc: - emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0])); + emit(BRW_OPCODE_RNDZ, this->result, op[0]); break; case ir_unop_ceil: op[0].negate = !op[0].negate; - inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); + inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); this->result.negate = true; break; case ir_unop_floor: - inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0])); + inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); break; case ir_unop_fract: - inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); + inst = emit(BRW_OPCODE_FRC, this->result, op[0]); break; case ir_unop_round_even: - emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); + emit(BRW_OPCODE_RNDE, this->result, op[0]); break; case ir_binop_min: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_L; - inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); inst->predicated = true; break; case ir_binop_max: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_G; - inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); inst->predicated = true; break; @@ -1041,16 +1017,16 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); + inst = emit(BRW_OPCODE_NOT, this->result, op[0]); break; case ir_binop_bit_and: - inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); break; case ir_binop_bit_xor: - inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); break; case ir_binop_bit_or: - inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); + inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); break; case ir_unop_u2f: @@ -1074,7 +1050,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, l.type = brw_type_for_base_type(type); r.type = brw_type_for_base_type(type); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); + fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); inst->predicated = predicated; l.reg_offset++; @@ -1127,7 +1103,7 @@ fs_visitor::visit(ir_assignment *ir) ir->lhs->type->is_vector()) { for (int i = 0; i < ir->lhs->type->vector_elements; i++) { if (ir->write_mask & (1 << i)) { - inst = emit(fs_inst(BRW_OPCODE_MOV, l, r)); + inst = emit(BRW_OPCODE_MOV, l, r); if (ir->condition) inst->predicated = true; r.reg_offset++; @@ -1152,8 +1128,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) if (ir->shadow_comparitor) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), - coordinate)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate.reg_offset++; } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ @@ -1163,29 +1138,25 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) /* There's no plain shadow compare message, so we use shadow * compare with a bias of 0.0. */ - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - fs_reg(0.0f))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); mlen++; } else if (ir->op == ir_txb) { ir->lod_info.bias->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } else { assert(ir->op == ir_txl); ir->lod_info.lod->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } ir->shadow_comparitor->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } else if (ir->op == ir_tex) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), - coordinate)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate.reg_offset++; } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ @@ -1199,8 +1170,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) assert(ir->op == ir_txb || ir->op == ir_txl); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), - coordinate)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate); coordinate.reg_offset++; } @@ -1209,13 +1179,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) if (ir->op == ir_txb) { ir->lod_info.bias->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } else { ir->lod_info.lod->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } @@ -1236,16 +1204,16 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) fs_inst *inst = NULL; switch (ir->op) { case ir_tex: - inst = emit(fs_inst(FS_OPCODE_TEX, dst)); + inst = emit(FS_OPCODE_TEX, dst); break; case ir_txb: - inst = emit(fs_inst(FS_OPCODE_TXB, dst)); + inst = emit(FS_OPCODE_TXB, dst); break; case ir_txl: - inst = emit(fs_inst(FS_OPCODE_TXL, dst)); + inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: - inst = emit(fs_inst(FS_OPCODE_TXD, dst)); + inst = emit(FS_OPCODE_TXD, dst); break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); @@ -1256,7 +1224,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) if (simd16) { for (int i = 0; i < 4; i++) { - emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst)); + emit(BRW_OPCODE_MOV, orig_dst, dst); orig_dst.reg_offset++; dst.reg_offset += 2; } @@ -1280,8 +1248,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) int base_mrf = 1; for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), - coordinate)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate); coordinate.reg_offset++; } mlen += ir->coordinate->type->vector_elements; @@ -1290,30 +1257,30 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate) mlen = MAX2(mlen, 5); ir->shadow_comparitor->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; } fs_inst *inst = NULL; switch (ir->op) { case ir_tex: - inst = emit(fs_inst(FS_OPCODE_TEX, dst)); + inst = emit(FS_OPCODE_TEX, dst); break; case ir_txb: ir->lod_info.bias->accept(this); mlen = MAX2(mlen, 5); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; - inst = emit(fs_inst(FS_OPCODE_TXB, dst)); + inst = emit(FS_OPCODE_TXB, dst); break; case ir_txl: ir->lod_info.lod->accept(this); mlen = MAX2(mlen, 5); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen++; - inst = emit(fs_inst(FS_OPCODE_TXL, dst)); + inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: case ir_txf: @@ -1356,14 +1323,14 @@ fs_visitor::visit(ir_texture *ir) } /* Explicitly set up the message header by copying g0 to msg reg m1. */ - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), - fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), + fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)); /* Then set the offset bits in DWord 2 of the message header. */ - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), - BRW_REGISTER_TYPE_UD)), - fs_reg(brw_imm_uw(offset_bits)))); + emit(BRW_OPCODE_MOV, + fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), + BRW_REGISTER_TYPE_UD)), + fs_reg(brw_imm_uw(offset_bits))); } /* Should be lowered by do_lower_texture_projection */ @@ -1409,10 +1376,10 @@ fs_visitor::visit(ir_texture *ir) fs_reg src = coordinate; coordinate = dst; - emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x)); + emit(BRW_OPCODE_MUL, dst, src, scale_x); dst.reg_offset++; src.reg_offset++; - emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y)); + emit(BRW_OPCODE_MUL, dst, src, scale_y); } /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -1453,13 +1420,13 @@ fs_visitor::visit(ir_texture *ir) l.reg_offset += i; if (swiz == SWIZZLE_ZERO) { - emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f))); + emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); } else if (swiz == SWIZZLE_ONE) { - emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f))); + emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); } else { fs_reg r = dst; r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); - emit(fs_inst(BRW_OPCODE_MOV, l, r)); + emit(BRW_OPCODE_MOV, l, r); } } this->result = swizzle_dst; @@ -1500,7 +1467,7 @@ fs_visitor::visit(ir_swizzle *ir) } channel.reg_offset += swiz; - emit(fs_inst(BRW_OPCODE_MOV, result, channel)); + emit(BRW_OPCODE_MOV, result, channel); result.reg_offset++; } } @@ -1512,8 +1479,8 @@ fs_visitor::visit(ir_discard *ir) assert(ir->condition == NULL); /* FINISHME */ - emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d)); - emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp)); + emit(FS_OPCODE_DISCARD_NOT, temp, reg_null_d); + emit(FS_OPCODE_DISCARD_AND, reg_null_d, temp); kill_emitted = true; } @@ -1539,7 +1506,7 @@ fs_visitor::visit(ir_constant *ir) dst_reg.type = src_reg.type; for (unsigned j = 0; j < size; j++) { - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + emit(BRW_OPCODE_MOV, dst_reg, src_reg); src_reg.reg_offset++; dst_reg.reg_offset++; } @@ -1554,7 +1521,7 @@ fs_visitor::visit(ir_constant *ir) dst_reg.type = src_reg.type; for (unsigned j = 0; j < size; j++) { - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + emit(BRW_OPCODE_MOV, dst_reg, src_reg); src_reg.reg_offset++; dst_reg.reg_offset++; } @@ -1565,16 +1532,16 @@ fs_visitor::visit(ir_constant *ir) for (unsigned i = 0; i < size; i++) { switch (ir->type->base_type) { case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); break; case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); break; case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); break; case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); break; default: assert(!"Non-float/uint/int/bool constant"); @@ -1605,40 +1572,39 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1))); + inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); inst->conditional_mod = BRW_CONDITIONAL_Z; break; case ir_binop_logic_xor: - inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1])); + inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_or: - inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1])); + inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_binop_logic_and: - inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1])); + inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_f2b: if (intel->gen >= 6) { - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, - op[0], fs_reg(0.0f))); + inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); + inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; case ir_unop_i2b: if (intel->gen >= 6) { - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0))); + inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); + inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; @@ -1651,14 +1617,14 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); + inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); inst->conditional_mod = brw_conditional_for_comparison(expr->operation); break; default: assert(!"not reached"); - this->fail = true; + fail("bad cond code\n"); break; } return; @@ -1667,11 +1633,10 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) ir->accept(this); if (intel->gen >= 6) { - fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, - this->result, fs_reg(1))); + fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); inst->conditional_mod = BRW_CONDITIONAL_NZ; } else { - fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result)); + fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result); inst->conditional_mod = BRW_CONDITIONAL_NZ; } } @@ -1700,36 +1665,36 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); + inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_Z; return; case ir_binop_logic_xor: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); inst->conditional_mod = BRW_CONDITIONAL_NZ; return; case ir_binop_logic_or: temp = fs_reg(this, glsl_type::bool_type); - emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1])); - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; return; case ir_binop_logic_and: temp = fs_reg(this, glsl_type::bool_type); - emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1])); - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0))); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; return; case ir_unop_f2b: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0))); + inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; return; case ir_unop_i2b: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; return; @@ -1741,15 +1706,15 @@ fs_visitor::emit_if_gen6(ir_if *ir) case ir_binop_all_equal: case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); inst->conditional_mod = brw_conditional_for_comparison(expr->operation); return; default: assert(!"not reached"); - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0))); + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - this->fail = true; + fail("bad condition\n"); return; } return; @@ -1757,7 +1722,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) ir->condition->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0))); + fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); inst->conditional_mod = BRW_CONDITIONAL_NZ; } @@ -1776,7 +1741,7 @@ fs_visitor::visit(ir_if *ir) } else { emit_bool_to_cond_code(ir->condition); - inst = emit(fs_inst(BRW_OPCODE_IF)); + inst = emit(BRW_OPCODE_IF); inst->predicated = true; } @@ -1788,7 +1753,7 @@ fs_visitor::visit(ir_if *ir) } if (!ir->else_instructions.is_empty()) { - emit(fs_inst(BRW_OPCODE_ELSE)); + emit(BRW_OPCODE_ELSE); foreach_iter(exec_list_iterator, iter, ir->else_instructions) { ir_instruction *ir = (ir_instruction *)iter.get(); @@ -1798,7 +1763,7 @@ fs_visitor::visit(ir_if *ir) } } - emit(fs_inst(BRW_OPCODE_ENDIF)); + emit(BRW_OPCODE_ENDIF); } void @@ -1815,21 +1780,20 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = ir->from; ir->from->accept(this); - emit(fs_inst(BRW_OPCODE_MOV, counter, this->result)); + emit(BRW_OPCODE_MOV, counter, this->result); } } - emit(fs_inst(BRW_OPCODE_DO)); + emit(BRW_OPCODE_DO); if (ir->to) { this->base_ir = ir->to; ir->to->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, - counter, this->result)); + fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); - inst = emit(fs_inst(BRW_OPCODE_BREAK)); + inst = emit(BRW_OPCODE_BREAK); inst->predicated = true; } @@ -1843,10 +1807,10 @@ fs_visitor::visit(ir_loop *ir) if (ir->increment) { this->base_ir = ir->increment; ir->increment->accept(this); - emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result)); + emit(BRW_OPCODE_ADD, counter, counter, this->result); } - emit(fs_inst(BRW_OPCODE_WHILE)); + emit(BRW_OPCODE_WHILE); } void @@ -1854,10 +1818,10 @@ fs_visitor::visit(ir_loop_jump *ir) { switch (ir->mode) { case ir_loop_jump::jump_break: - emit(fs_inst(BRW_OPCODE_BREAK)); + emit(BRW_OPCODE_BREAK); break; case ir_loop_jump::jump_continue: - emit(fs_inst(BRW_OPCODE_CONTINUE)); + emit(BRW_OPCODE_CONTINUE); break; } } @@ -1923,23 +1887,13 @@ void fs_visitor::emit_dummy_fs() { /* Everyone's favorite color. */ - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, 2), - fs_reg(1.0f))); - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, 3), - fs_reg(0.0f))); - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, 4), - fs_reg(1.0f))); - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, 5), - fs_reg(0.0f))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f)); fs_inst *write; - write = emit(fs_inst(FS_OPCODE_FB_WRITE, - fs_reg(0), - fs_reg(0))); + write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); write->base_mrf = 0; } @@ -1969,14 +1923,14 @@ fs_visitor::emit_interpolation_setup_gen4() this->pixel_y = fs_reg(this, glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(fs_inst(BRW_OPCODE_ADD, - this->pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(fs_inst(BRW_OPCODE_ADD, - this->pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + emit(BRW_OPCODE_ADD, + this->pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010))); + emit(BRW_OPCODE_ADD, + this->pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100))); this->current_annotation = "compute pixel deltas from v0"; if (brw->has_pln) { @@ -1987,22 +1941,18 @@ fs_visitor::emit_interpolation_setup_gen4() this->delta_x = fs_reg(this, glsl_type::float_type); this->delta_y = fs_reg(this, glsl_type::float_type); } - emit(fs_inst(BRW_OPCODE_ADD, - this->delta_x, - this->pixel_x, - fs_reg(negate(brw_vec1_grf(1, 0))))); - emit(fs_inst(BRW_OPCODE_ADD, - this->delta_y, - this->pixel_y, - fs_reg(negate(brw_vec1_grf(1, 1))))); + emit(BRW_OPCODE_ADD, this->delta_x, + this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); + emit(BRW_OPCODE_ADD, this->delta_y, + this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); this->current_annotation = "compute pos.w and 1/pos.w"; /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. */ this->wpos_w = fs_reg(this, glsl_type::float_type); - emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 3))); + emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -2021,14 +1971,14 @@ fs_visitor::emit_interpolation_setup_gen6() fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(fs_inst(BRW_OPCODE_ADD, - int_pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(fs_inst(BRW_OPCODE_ADD, - int_pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); + emit(BRW_OPCODE_ADD, + int_pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010))); + emit(BRW_OPCODE_ADD, + int_pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual @@ -2036,13 +1986,13 @@ fs_visitor::emit_interpolation_setup_gen6() */ this->pixel_x = fs_reg(this, glsl_type::float_type); this->pixel_y = fs_reg(this, glsl_type::float_type); - emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x)); - emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); + emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); + emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); - this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); - this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + this->current_annotation = "compute pos.w"; + this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); + this->wpos_w = fs_reg(this, glsl_type::float_type); + emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); @@ -2069,8 +2019,8 @@ fs_visitor::emit_fb_writes() } if (c->aa_dest_stencil_reg) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); } /* Reserve space for color. It'll be filled in per MRT below. */ @@ -2083,17 +2033,17 @@ fs_visitor::emit_fb_writes() assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth); } else { /* Pass through the payload depth. */ - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); } } if (c->dest_depth_reg) { - emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); } fs_reg color = reg_undef; @@ -2110,9 +2060,7 @@ fs_visitor::emit_fb_writes() target); if (this->frag_color || this->frag_data) { for (int i = 0; i < 4; i++) { - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, color_mrf + i), - color)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i), color); color.reg_offset++; } } @@ -2120,8 +2068,7 @@ fs_visitor::emit_fb_writes() if (this->frag_color) color.reg_offset -= 4; - fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, - reg_undef, reg_undef)); + fs_inst *inst = emit(FS_OPCODE_FB_WRITE); inst->target = target; inst->base_mrf = 0; inst->mlen = nr; @@ -2137,13 +2084,10 @@ fs_visitor::emit_fb_writes() * renderbuffer. */ color.reg_offset += 3; - emit(fs_inst(BRW_OPCODE_MOV, - fs_reg(MRF, color_mrf + 3), - color)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color); } - fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, - reg_undef, reg_undef)); + fs_inst *inst = emit(FS_OPCODE_FB_WRITE); inst->base_mrf = 0; inst->mlen = nr; inst->eot = true; @@ -2868,8 +2812,7 @@ fs_visitor::calculate_live_intervals() if (inst->src[i].file == GRF && inst->src[i].reg != 0) { int reg = inst->src[i].reg; - if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && - def[reg] >= bb_header_ip)) { + if (!loop_depth) { use[reg] = ip; } else { def[reg] = MIN2(loop_start, def[reg]); @@ -2885,8 +2828,7 @@ fs_visitor::calculate_live_intervals() if (inst->dst.file == GRF && inst->dst.reg != 0) { int reg = inst->dst.reg; - if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 && - !inst->predicated)) { + if (!loop_depth) { def[reg] = MIN2(def[reg], ip); } else { def[reg] = MIN2(def[reg], loop_start); @@ -2996,12 +2938,41 @@ fs_visitor::propagate_constants() progress = true; } break; + case BRW_OPCODE_CMP: + if (i == 1) { + scan_inst->src[i] = inst->src[0]; + progress = true; + } else if (i == 0 && scan_inst->src[1].file != IMM) { + uint32_t new_cmod; + + new_cmod = brw_swap_cmod(scan_inst->conditional_mod); + if (new_cmod != ~0u) { + /* Fit this constant in by swapping the operands and + * flipping the test + */ + scan_inst->src[0] = scan_inst->src[1]; + scan_inst->src[1] = inst->src[0]; + scan_inst->conditional_mod = new_cmod; + progress = true; + } + } + break; + case BRW_OPCODE_SEL: if (i == 1) { scan_inst->src[i] = inst->src[0]; progress = true; + } else if (i == 0 && scan_inst->src[1].file != IMM) { + /* Fit this constant in by swapping the operands and + * flipping the predicate + */ + scan_inst->src[0] = scan_inst->src[1]; + scan_inst->src[1] = inst->src[0]; + scan_inst->predicate_inverse = !scan_inst->predicate_inverse; + progress = true; } + break; } } @@ -3487,6 +3458,7 @@ fs_visitor::generate_code() brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicated); + brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); switch (inst->opcode) { @@ -3677,7 +3649,7 @@ fs_visitor::generate_code() } else { _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); } - this->fail = true; + fail("unsupported opcode in FS\n"); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) { @@ -3808,18 +3780,18 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.assign_regs_trivial(); else { while (!v.assign_regs()) { - if (v.fail) + if (v.failed) break; } } } - if (!v.fail) + if (!v.failed) v.generate_code(); - assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */ + assert(!v.failed); /* FINISHME: Cleanly fail, tested at link time, etc. */ - if (v.fail) + if (v.failed) return GL_FALSE; c->prog_data.total_grf = v.grf_used; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index dc030ae..f792906 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -331,6 +331,7 @@ public: fs_reg src[3]; bool saturate; bool predicated; + bool predicate_inverse; int conditional_mod; /**< BRW_CONDITIONAL_* */ int mlen; /**< SEND message length */ @@ -364,7 +365,7 @@ public: this->ctx = &intel->ctx; this->mem_ctx = ralloc_context(NULL); this->shader = shader; - this->fail = false; + this->failed = false; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); @@ -432,6 +433,32 @@ public: void visit(ir_function_signature *ir); fs_inst *emit(fs_inst inst); + + fs_inst *emit(int opcode) + { + return emit(fs_inst(opcode)); + } + + fs_inst *emit(int opcode, fs_reg dst) + { + return emit(fs_inst(opcode, dst)); + } + + fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + { + return emit(fs_inst(opcode, dst, src0)); + } + + fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + { + return emit(fs_inst(opcode, dst, src0, src1)); + } + + fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + { + return emit(fs_inst(opcode, dst, src0, src1, src2)); + } + void setup_paramvalues_refs(); void assign_curb_setup(); void calculate_urb_setup(); @@ -450,6 +477,7 @@ public: bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); + void fail(const char *msg, ...); void generate_code(); void generate_fb_write(fs_inst *inst); @@ -523,7 +551,7 @@ public: ir_instruction *base_ir; /** @} */ - bool fail; + bool failed; /* Result of last visit() method. */ fs_reg result; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f027742..67f29ce 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -119,8 +119,7 @@ fs_visitor::assign_regs() } if (i == class_count) { if (this->virtual_grf_sizes[r] >= base_reg_count) { - fprintf(stderr, "Object too large to register allocate.\n"); - this->fail = true; + fail("Object too large to register allocate.\n"); } class_sizes[class_count++] = this->virtual_grf_sizes[r]; @@ -226,8 +225,9 @@ fs_visitor::assign_regs() * loop back into here to try again. */ int reg = choose_spill_reg(g); - if (reg == -1 || intel->gen >= 6) { - this->fail = true; + + if (reg == -1) { + fail("no register to spill\n"); } else { spill_reg(reg); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 70c451d..14ee676 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -83,45 +83,23 @@ static void compile_gs_prog( struct brw_context *brw, /* Note that primitives which don't require a GS program have * already been weeded out by this stage: */ + + /* Gen6: VF has already converted into polygon, and LINELOOP is + * converted to LINESTRIP at the beginning of the 3D pipeline. + */ + if (intel->gen == 6) + return; + switch (key->primitive) { case GL_QUADS: - /* Gen6: VF has already converted into polygon. */ - if (intel->gen == 6) - return; brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: - if (intel->gen == 6) - return; brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: - /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */ - if (intel->gen == 6) - return; brw_gs_lines( &c ); break; - case GL_LINES: - if (key->hint_gs_always) - brw_gs_lines( &c ); - else { - return; - } - break; - case GL_TRIANGLES: - if (key->hint_gs_always) - brw_gs_tris( &c ); - else { - return; - } - break; - case GL_POINTS: - if (key->hint_gs_always) - brw_gs_points( &c ); - else { - return; - } - break; default: return; } @@ -170,7 +148,6 @@ static void populate_key( struct brw_context *brw, { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - int prim_gs_always; memset(key, 0, sizeof(*key)); @@ -180,8 +157,6 @@ static void populate_key( struct brw_context *brw, /* BRW_NEW_PRIMITIVE */ key->primitive = gs_prim[brw->primitive]; - key->hint_gs_always = 0; /* debug code? */ - /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) { @@ -191,14 +166,11 @@ static void populate_key( struct brw_context *brw, key->pv_first = GL_TRUE; } - if (intel->gen == 6) - prim_gs_always = 0; - else - prim_gs_always = brw->primitive == GL_QUADS || - brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP; - - key->need_gs_prog = (key->hint_gs_always || prim_gs_always); + key->need_gs_prog = (intel->gen == 6) + ? 0 + : (brw->primitive == GL_QUADS || + brw->primitive == GL_QUAD_STRIP || + brw->primitive == GL_LINE_LOOP); } /* Calculate interpolants for triangle and line rasterization. diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 7e35310..c33528e 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -42,10 +42,9 @@ struct brw_gs_prog_key { GLbitfield64 attrs; GLuint primitive:4; - GLuint hint_gs_always:1; GLuint pv_first:1; GLuint need_gs_prog:1; - GLuint pad:25; + GLuint pad:26; }; struct brw_gs_compile { @@ -70,8 +69,6 @@ struct brw_gs_compile { void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); -void brw_gs_tris( struct brw_gs_compile *c ); void brw_gs_lines( struct brw_gs_compile *c ); -void brw_gs_points( struct brw_gs_compile *c ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index e1f751f..3bb526b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -193,19 +193,6 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) } } -void brw_gs_tris( struct brw_gs_compile *c ) -{ - struct intel_context *intel = &c->func.brw->intel; - - brw_gs_alloc_regs(c, 3); - - if (intel->needs_ff_sync) - brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); -} - void brw_gs_lines( struct brw_gs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; @@ -217,22 +204,3 @@ void brw_gs_lines( struct brw_gs_compile *c ) brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); } - -void brw_gs_points( struct brw_gs_compile *c ) -{ - struct intel_context *intel = &c->func.brw->intel; - - brw_gs_alloc_regs(c, 1); - - if (intel->needs_ff_sync) - brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); -} - - - - - - - - diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index c768be2..19eea07 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -149,7 +149,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) else OUT_BATCH(0); OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->sf.state_offset); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->cc.state_offset); @@ -247,8 +248,7 @@ static void emit_depthbuffer(struct brw_context *brw) } assert(region->tiling != I915_TILING_X); - if (intel->gen >= 6) - assert(region->tiling != I915_TILING_NONE); + assert(intel->gen < 6 || region->tiling == I915_TILING_Y); BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); @@ -283,6 +283,9 @@ static void emit_depthbuffer(struct brw_context *brw) } } +/** + * \see brw_context.state.depth_region + */ const struct brw_tracked_state brw_depthbuffer = { .dirty = { .mesa = 0, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index ee68095..6674f16 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -134,11 +134,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, brw_fragment_program_const(brw->fragment_program); struct gl_shader_program *shader_program; - if (fprog->FogOption) { - _mesa_append_fog_code(ctx, fprog); - fprog->FogOption = GL_NONE; - } - if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index bd3a21e..66d91a0 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -38,14 +38,16 @@ static void upload_sf_vp(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; - struct brw_sf_viewport sfv; + struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; - memset(&sfv, 0, sizeof(sfv)); + sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); + memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { y_scale = 1.0; @@ -58,12 +60,12 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + sfv->viewport.m00 = v[MAT_SX]; + sfv->viewport.m11 = v[MAT_SY] * y_scale; + sfv->viewport.m22 = v[MAT_SZ] * depth_scale; + sfv->viewport.m30 = v[MAT_TX]; + sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv->viewport.m32 = v[MAT_TZ] * depth_scale; /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT * for DrawBuffer->_[XY]{min,max} @@ -85,27 +87,31 @@ static void upload_sf_vp(struct brw_context *brw) * anything. Instead, just provide a min > max scissor inside * the bounds, which produces the expected no rendering. */ - sfv.scissor.xmin = 1; - sfv.scissor.xmax = 0; - sfv.scissor.ymin = 1; - sfv.scissor.ymax = 0; + sfv->scissor.xmin = 1; + sfv->scissor.xmax = 0; + sfv->scissor.ymin = 1; + sfv->scissor.ymax = 0; } else if (render_to_fbo) { /* texmemory: Y=0=bottom */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; - sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + sfv->scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv->scissor.ymin = ctx->DrawBuffer->_Ymin; + sfv->scissor.ymax = ctx->DrawBuffer->_Ymax - 1; } else { /* memory: Y=0=top */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; - sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + sfv->scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv->scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; } + /* Keep a pointer to it for brw_state_dump.c */ drm_intel_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv)); + drm_intel_bo_reference(intel->batch.bo); + brw->sf.vp_bo = intel->batch.bo; + + brw->state.dirty.cache |= CACHE_NEW_SF_VP; } const struct brw_tracked_state brw_sf_vp = { @@ -113,92 +119,44 @@ const struct brw_tracked_state brw_sf_vp = { .mesa = (_NEW_VIEWPORT | _NEW_SCISSOR | _NEW_BUFFERS), - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0 }, .prepare = upload_sf_vp }; -struct brw_sf_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - - unsigned int nr_urb_entries, urb_size, sfsize; - - GLenum front_face, cull_face; - unsigned pv_first:1; - unsigned scissor:1; - unsigned line_smooth:1; - unsigned point_sprite:1; - unsigned use_vs_point_size:1; - unsigned render_to_fbo:1; - float line_width; - float point_size; -}; - -static void -sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_SF_PROG */ - key->total_grf = brw->sf.prog_data->total_grf; - key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_sf_entries; - key->urb_size = brw->urb.vsize; - key->sfsize = brw->urb.sfsize; - - key->scissor = ctx->Scissor.Enabled; - key->front_face = ctx->Polygon.FrontFace; - - if (ctx->Polygon.CullFlag) - key->cull_face = ctx->Polygon.CullFaceMode; - else - key->cull_face = GL_NONE; - - key->line_width = ctx->Line.Width; - key->line_smooth = ctx->Line.SmoothFlag; - - key->point_sprite = ctx->Point.PointSprite; - key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - key->use_vs_point_size = (ctx->VertexProgram.PointSizeEnabled || - ctx->Point._Attenuated); - - /* _NEW_LIGHT */ - key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); - - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; -} - -static drm_intel_bo * -sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, - drm_intel_bo **reloc_bufs) +static void upload_sf_unit( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - struct brw_sf_unit_state sf; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + struct brw_sf_unit_state *sf; + drm_intel_bo *bo = intel->batch.bo; int chipset_max_threads; - memset(&sf, 0, sizeof(sf)); + bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + + sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset); - sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + memset(sf, 0, sizeof(*sf)); + + /* CACHE_NEW_SF_PROG */ + sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ - sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - sf.thread3.dispatch_grf_start_reg = 3; + sf->thread3.dispatch_grf_start_reg = 3; if (intel->gen == 5) - sf.thread3.urb_entry_read_offset = 3; + sf->thread3.urb_entry_read_offset = 3; else - sf.thread3.urb_entry_read_offset = 1; + sf->thread3.urb_entry_read_offset = 1; - sf.thread3.urb_entry_read_length = key->urb_entry_read_length; + /* CACHE_NEW_SF_PROG */ + sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; - sf.thread4.nr_urb_entries = key->nr_urb_entries; - sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + /* BRW_NEW_URB_FENCE */ + sf->thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf->thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or * 48 (Ironlake) threads. @@ -208,46 +166,51 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else chipset_max_threads = 24; - sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; + /* BRW_NEW_URB_FENCE */ + sf->thread4.max_threads = MIN2(chipset_max_threads, + brw->urb.nr_sf_entries) - 1; if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD)) - sf.thread4.max_threads = 0; + sf->thread4.max_threads = 0; if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - sf.thread4.stats_enable = 1; + sf->thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset + + brw->sf.vp_offset) >> 5; /* reloc */ - sf.sf5.viewport_transform = 1; + sf->sf5.viewport_transform = 1; /* _NEW_SCISSOR */ - if (key->scissor) - sf.sf6.scissor = 1; + if (ctx->Scissor.Enabled) + sf->sf6.scissor = 1; /* _NEW_POLYGON */ - if (key->front_face == GL_CCW) - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + if (ctx->Polygon.FrontFace == GL_CCW) + sf->sf5.front_winding = BRW_FRONTWINDING_CCW; else - sf.sf5.front_winding = BRW_FRONTWINDING_CW; + sf->sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to a FBO, and that inverts + /* _NEW_BUFFERS + * The viewport is inverted for rendering to a FBO, and that inverts * polygon front/back orientation. */ - sf.sf5.front_winding ^= key->render_to_fbo; + sf->sf5.front_winding ^= render_to_fbo; - switch (key->cull_face) { + /* _NEW_POLYGON */ + switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) { case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + sf->sf6.cull_mode = BRW_CULLMODE_FRONT; break; case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; + sf->sf6.cull_mode = BRW_CULLMODE_BACK; break; case GL_FRONT_AND_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + sf->sf6.cull_mode = BRW_CULLMODE_BOTH; break; case GL_NONE: - sf.sf6.cull_mode = BRW_CULLMODE_NONE; + sf->sf6.cull_mode = BRW_CULLMODE_NONE; break; default: assert(0); @@ -256,19 +219,18 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* _NEW_LINE */ /* XXX use ctx->Const.Min/MaxLineWidth here */ - sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); + sf->sf6.line_width = CLAMP(ctx->Line.Width, 1.0, 5.0) * (1<<1); - sf.sf6.line_endcap_aa_region_width = 1; - if (key->line_smooth) - sf.sf6.aa_enable = 1; - else if (sf.sf6.line_width <= 0x2) - sf.sf6.line_width = 0; + sf->sf6.line_endcap_aa_region_width = 1; + if (ctx->Line.SmoothFlag) + sf->sf6.aa_enable = 1; + else if (sf->sf6.line_width <= 0x2) + sf->sf6.line_width = 0; /* _NEW_BUFFERS */ - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - if (!key->render_to_fbo) { + if (!render_to_fbo) { /* Rendering to an OpenGL window */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + sf->sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; } else { /* If rendering to an FBO, the pixel coordinate system is @@ -290,74 +252,56 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, * incorrectly, which is no worse than occurs without * the value, so we're using it here. */ - sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + sf->sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; } /* XXX clamp max depends on AA vs. non-AA */ /* _NEW_POINT */ - sf.sf7.sprite_point = key->point_sprite; - sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); - sf.sf7.use_point_size_state = !key->use_vs_point_size; - sf.sf7.aa_line_distance_mode = 0; + sf->sf7.sprite_point = ctx->Point.PointSprite; + sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size, + ctx->Point.MinSize, + ctx->Point.MaxSize)), 1, 255) * (1<<3); + sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled || + ctx->Point._Attenuated); + sf->sf7.aa_line_distance_mode = 0; /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + * _NEW_LIGHT */ - if (!key->pv_first) { - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { + sf->sf7.trifan_pv = 2; + sf->sf7.linestrip_pv = 1; + sf->sf7.tristrip_pv = 2; } else { - sf.sf7.trifan_pv = 1; - sf.sf7.linestrip_pv = 0; - sf.sf7.tristrip_pv = 0; + sf->sf7.trifan_pv = 1; + sf->sf7.linestrip_pv = 0; + sf->sf7.tristrip_pv = 0; } - sf.sf7.line_last_pixel_enable = 0; + sf->sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: */ - sf.sf6.dest_org_vbias = 0x8; - sf.sf6.dest_org_hbias = 0x8; - - bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, - key, sizeof(*key), - reloc_bufs, 2, - &sf, sizeof(sf)); + sf->sf6.dest_org_vbias = 0x8; + sf->sf6.dest_org_hbias = 0x8; /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ /* Emit SF program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo, sf.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, thread0)), + brw->sf.prog_bo, sf->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); /* Emit SF viewport relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo, (sf.sf5.front_winding | - (sf.sf5.viewport_transform << 1)), + drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, sf5)), + intel->batch.bo, (brw->sf.vp_offset | + sf->sf5.front_winding | + (sf->sf5.viewport_transform << 1)), I915_GEM_DOMAIN_INSTRUCTION, 0); - return bo; -} - -static void upload_sf_unit( struct brw_context *brw ) -{ - struct brw_sf_unit_key key; - drm_intel_bo *reloc_bufs[2]; - - sf_unit_populate_key(brw, &key); - - reloc_bufs[0] = brw->sf.prog_bo; - reloc_bufs[1] = brw->sf.vp_bo; - - drm_intel_bo_unreference(brw->sf.state_bo); - brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, - &key, sizeof(key), - reloc_bufs, 2, - NULL); - if (brw->sf.state_bo == NULL) { - brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); - } + brw->state.dirty.cache |= CACHE_NEW_SF_UNIT; } const struct brw_tracked_state brw_sf_unit = { @@ -368,7 +312,8 @@ const struct brw_tracked_state brw_sf_unit = { _NEW_POINT | _NEW_SCISSOR | _NEW_BUFFERS), - .brw = BRW_NEW_URB_FENCE, + .brw = (BRW_NEW_BATCH | + BRW_NEW_URB_FENCE), .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index fdce79d..b393259 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -195,8 +195,8 @@ static void dump_sf_viewport_state(struct brw_context *brw) drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE); - vp = brw->sf.vp_bo->virtual; - vp_off = brw->sf.vp_bo->offset; + vp = brw->sf.vp_bo->virtual + brw->sf.vp_offset; + vp_off = brw->sf.vp_bo->offset + brw->sf.vp_offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 6687a89..8d4797fb 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -80,25 +80,6 @@ struct brw_3d_control GLuint dword3; }; - -struct brw_3d_primitive -{ - struct - { - GLuint length:8; - GLuint pad:2; - GLuint topology:5; - GLuint indexed:1; - GLuint opcode:16; - } header; - - GLuint verts_per_instance; - GLuint start_vert_location; - GLuint instance_count; - GLuint start_instance_location; - GLuint base_vert_location; -}; - /* These seem to be passed around as function args, so it works out * better to keep them as #defines: */ @@ -1247,31 +1228,6 @@ struct brw_surface_state }; - -struct brw_vertex_buffer_state -{ - struct { - GLuint pitch:11; - GLuint pad:15; - GLuint access_type:1; - GLuint vb_index:5; - } vb0; - - GLuint start_addr; - GLuint max_index; -#if 1 - GLuint instance_data_step_rate; /* not included for sequential/random vertices? */ -#endif -}; - -#define BRW_VBP_MAX 17 - -struct brw_vb_array_state { - struct header header; - struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; -}; - - struct brw_vertex_element_state { struct diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 6ae75d2..63ae131 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -132,6 +132,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) ctx->Polygon.BackMode != GL_FILL); key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + /* _NEW_LIGHT | _NEW_BUFFERS */ + key.clamp_vertex_color = ctx->Light._ClampVertexColor; + /* _NEW_POINT */ if (ctx->Point.PointSprite) { for (i = 0; i < 8; i++) { @@ -158,7 +161,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, + .mesa = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT | + _NEW_BUFFERS), .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 0b88cc1..7ca84a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -45,6 +45,7 @@ struct brw_vs_prog_key { GLuint copy_edgeflag:1; GLuint point_coord_replace:8; GLuint two_side_color: 1; + GLuint clamp_vertex_color:1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index acacf37..dd4e1e6 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -437,8 +437,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) if (c->key.nr_userclip) header_regs += 2; + /* Each attribute is 16 bytes (1 vec4), so dividing by 8 gives us the + * number of 128-byte (1024-bit) units. + */ c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8; } else if (intel->gen == 5) + /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the + * number of 64-byte (512-bit) units. + */ c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -2215,7 +2221,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) * instructions. Instead, we directly modify the header * of the last (already stored) instruction. */ - if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.File == PROGRAM_OUTPUT && + c->key.clamp_vertex_color) { if ((inst->DstReg.Index == VERT_RESULT_COL0) || (inst->DstReg.Index == VERT_RESULT_COL1) || (inst->DstReg.Index == VERT_RESULT_BFC0) diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 152ee14..ce8712a 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -115,13 +115,11 @@ static void brw_set_draw_region( struct intel_context *intel, { struct brw_context *brw = brw_context(&intel->ctx); - /* release old color/depth regions */ - if (brw->state.depth_region != depth_region) + if (brw->state.depth_region != depth_region) { brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - intel_region_release(&brw->state.depth_region); - - /* reference new color/depth regions */ - intel_region_reference(&brw->state.depth_region, depth_region); + intel_region_release(&brw->state.depth_region); + intel_region_reference(&brw->state.depth_region, depth_region); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ca51d15..65af227 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) { + struct intel_context *intel = &brw->intel; struct brw_wm_compile *c; const GLuint *program; GLuint program_size; @@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { + uint32_t total_scratch; + /* Per-thread scratch space is power-of-two sized. */ for (c->prog_data.total_scratch = 1024; c->prog_data.total_scratch <= c->last_scratch; c->prog_data.total_scratch *= 2) { /* empty */ } + total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; + + if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { + drm_intel_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; + } + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, + "wm scratch", + total_scratch, + 4096); + } } else { c->prog_data.total_scratch = 0; @@ -348,6 +363,9 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_HINT */ key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; @@ -370,14 +388,14 @@ static void brw_wm_populate_key( struct brw_context *brw, * well and our shadow compares always return the result in * all 4 channels. */ - if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { - if (t->DepthMode == GL_ALPHA) { + if (t->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (t->Sampler.DepthMode == GL_ALPHA) { swizzles[0] = SWIZZLE_ZERO; swizzles[1] = SWIZZLE_ZERO; swizzles[2] = SWIZZLE_ZERO; - } else if (t->DepthMode == GL_LUMINANCE) { + } else if (t->Sampler.DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; - } else if (t->DepthMode == GL_RED) { + } else if (t->Sampler.DepthMode == GL_RED) { /* See table 3.23 of the GL 3.0 spec. */ swizzles[1] = SWIZZLE_ZERO; swizzles[2] = SWIZZLE_ZERO; @@ -471,6 +489,7 @@ const struct brw_tracked_state brw_wm_prog = { _NEW_POLYGON | _NEW_LINE | _NEW_LIGHT | + _NEW_FRAG_CLAMP | _NEW_BUFFERS | _NEW_TEXTURE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index c40d7bf..40659f2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -65,6 +65,7 @@ struct brw_wm_prog_key { GLuint nr_color_regions:5; GLuint render_to_fbo:1; GLuint alpha_test:1; + GLuint clamp_fragment_color:1; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index ecfd21d..cdc1f36 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1408,6 +1408,9 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); + if (c->key.clamp_fragment_color) + brw_set_saturate(p, 1); + for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1459,6 +1462,9 @@ void emit_fb_write(struct brw_wm_compile *c, } } } + + brw_set_saturate(p, 0); + /* skip over the regs populated above: */ if (c->dispatch_width == 16) diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 30672b4..cfc30d8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -288,26 +288,26 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? ctx->Texture.CubeMapSeamless : GL_FALSE; - entry->wrap_r = texObj->WrapR; - entry->wrap_s = texObj->WrapS; - entry->wrap_t = texObj->WrapT; - - entry->maxlod = texObj->MaxLod; - entry->minlod = texObj->MinLod; - entry->lod_bias = texUnit->LodBias + texObj->LodBias; - entry->max_aniso = texObj->MaxAnisotropy; - entry->minfilter = texObj->MinFilter; - entry->magfilter = texObj->MagFilter; - entry->comparemode = texObj->CompareMode; - entry->comparefunc = texObj->CompareFunc; + entry->wrap_r = texObj->Sampler.WrapR; + entry->wrap_s = texObj->Sampler.WrapS; + entry->wrap_t = texObj->Sampler.WrapT; + + entry->maxlod = texObj->Sampler.MaxLod; + entry->minlod = texObj->Sampler.MinLod; + entry->lod_bias = texUnit->LodBias + texObj->Sampler.LodBias; + entry->max_aniso = texObj->Sampler.MaxAnisotropy; + entry->minfilter = texObj->Sampler.MinFilter; + entry->magfilter = texObj->Sampler.MagFilter; + entry->comparemode = texObj->Sampler.CompareMode; + entry->comparefunc = texObj->Sampler.CompareFunc; drm_intel_bo_unreference(brw->wm.sdc_bo[unit]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { float bordercolor[4] = { - texObj->BorderColor.f[0], - texObj->BorderColor.f[0], - texObj->BorderColor.f[0], - texObj->BorderColor.f[0] + texObj->Sampler.BorderColor.f[0], + texObj->Sampler.BorderColor.f[0], + texObj->Sampler.BorderColor.f[0], + texObj->Sampler.BorderColor.f[0] }; /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -316,7 +316,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); } else { brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->BorderColor.f); + texObj->Sampler.BorderColor.f); } key->sampler_count = unit + 1; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 5b5afc4..be4b260 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, static void upload_wm_unit( struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; struct brw_wm_unit_key key; drm_intel_bo *reloc_bufs[3]; wm_unit_populate_key(brw, &key); - /* Allocate the necessary scratch space if we haven't already. Don't - * bother reducing the allocation later, since we use scratch so - * rarely. - */ - if (key.total_scratch) { - GLuint total = key.total_scratch * brw->wm_max_threads; - - if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); - } - } - reloc_bufs[0] = brw->wm.prog_bo; reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 1010d9f..e3396a3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -100,18 +100,37 @@ static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] = [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB, [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB, [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM, + [MESA_FORMAT_SIGNED_R8] = BRW_SURFACEFORMAT_R8_SNORM, + [MESA_FORMAT_SIGNED_RG88_REV] = BRW_SURFACEFORMAT_R8G8_SNORM, [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, + [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM, + [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM, + [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT, + [MESA_FORMAT_RG_FLOAT32] = BRW_SURFACEFORMAT_R32G32_FLOAT, + [MESA_FORMAT_R_FLOAT32] = BRW_SURFACEFORMAT_R32_FLOAT, + [MESA_FORMAT_INTENSITY_FLOAT32] = BRW_SURFACEFORMAT_I32_FLOAT, + [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT, + [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT, + [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT, }; bool brw_render_target_supported(gl_format format) { + /* These are not color render targets like the table holds, but we + * ask the question for FBO completeness. + */ if (format == MESA_FORMAT_S8_Z24 || format == MESA_FORMAT_X8_Z24 || format == MESA_FORMAT_Z16) { return true; } + /* The value of this BRW_SURFACEFORMAT is 0, so hardcode it. + */ + if (format == MESA_FORMAT_RGBA_FLOAT32) + return true; + /* Not exactly true, as some of those formats are not renderable. * But at least we know how to translate them. */ @@ -155,6 +174,13 @@ static GLuint translate_tex_format( gl_format mesa_format, return brw_format_for_mesa_format[mesa_format]; else if (srgb_decode == GL_SKIP_DECODE_EXT) return brw_format_for_mesa_format[_mesa_get_srgb_format_linear(mesa_format)]; + + case MESA_FORMAT_RGBA_FLOAT32: + /* The value of this BRW_SURFACEFORMAT is 0, which tricks the + * assertion below. + */ + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + default: assert(brw_format_for_mesa_format[mesa_format] != 0); return brw_format_for_mesa_format[mesa_format]; @@ -197,8 +223,9 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf->ss0.surface_type = translate_tex_target(tObj->Target); surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat, - firstImage->InternalFormat, - tObj->DepthMode, tObj->sRGBDecode); + firstImage->InternalFormat, + tObj->Sampler.DepthMode, + tObj->Sampler.sRGBDecode); /* This is ok for all textures with channel width 8bit or less: */ @@ -425,6 +452,14 @@ brw_update_renderbuffer_surface(struct brw_context *brw, */ surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; + case MESA_FORMAT_INTENSITY_FLOAT32: + case MESA_FORMAT_LUMINANCE_FLOAT32: + /* For these formats, we just need to read/write the first + * channel into R, which is to say that we just treat them as + * GL_RED. + */ + surf->ss0.surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + break; case MESA_FORMAT_SARGB8: /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the blend/update as sRGB */ @@ -434,8 +469,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; default: + assert(brw_render_target_supported(irb->Base.Format)); surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format]; - assert(surf->ss0.surface_format != 0); } surf->ss0.surface_type = BRW_SURFACE_2D; diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index d1648a1..1b935fb 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -210,10 +210,10 @@ color_calc_state_populate_key(struct brw_context *brw, if (ctx->Color.AlphaEnabled) key->alpha_ref = ctx->Color.AlphaRef; - key->blend_constant_color[0] = ctx->Color.BlendColor[0]; - key->blend_constant_color[1] = ctx->Color.BlendColor[1]; - key->blend_constant_color[2] = ctx->Color.BlendColor[2]; - key->blend_constant_color[3] = ctx->Color.BlendColor[3]; + key->blend_constant_color[0] = ctx->Color.BlendColorUnclamped[0]; + key->blend_constant_color[1] = ctx->Color.BlendColorUnclamped[1]; + key->blend_constant_color[2] = ctx->Color.BlendColorUnclamped[2]; + key->blend_constant_color[3] = ctx->Color.BlendColorUnclamped[3]; } /** diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 7296c7c..c1d0a73 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -44,38 +44,22 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); - if (brw->gs.prog_bo) { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BATCH(GEN6_GS_SPF_MODE | - (0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | - (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE); - OUT_BATCH(GEN6_GS_ENABLE); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); /* prog_bo */ - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + // GS should never be used on Gen6. Disable it. + assert(brw->gs.prog_bo == NULL); + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_gs_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index c3819f9..909e1bb 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -34,26 +34,25 @@ static void prepare_urb( struct brw_context *brw ) { - int urb_size, max_urb_entry; - struct intel_context *intel = &brw->intel; - - if (IS_GT1(intel->intelScreen->deviceID)) { - urb_size = 32 * 1024; - max_urb_entry = 128; - } else { - urb_size = 64 * 1024; - max_urb_entry = 256; - } - - brw->urb.nr_vs_entries = max_urb_entry; - brw->urb.nr_gs_entries = max_urb_entry; + int nr_vs_entries; /* CACHE_NEW_VS_PROG */ brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); - if (2 * brw->urb.vs_size > urb_size) - brw->urb.nr_vs_entries = brw->urb.nr_gs_entries = - (urb_size ) / (2 * brw->urb.vs_size); + /* Calculate how many VS URB entries fit in the total URB size */ + nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128); + + if (nr_vs_entries > brw->urb.max_vs_handles) + nr_vs_entries = brw->urb.max_vs_handles; + + /* According to volume 2a, nr_vs_entries must be a multiple of 4. */ + brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); + + /* Since we currently don't support Geometry Shaders, we always put the + * GS unit in passthrough mode and don't allocate it any URB space. + */ + brw->urb.nr_gs_entries = 0; + brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */ } static void @@ -61,6 +60,7 @@ upload_urb(struct brw_context *brw) { struct intel_context *intel = &brw->intel; + assert(brw->urb.nr_vs_entries >= 24); assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ @@ -70,7 +70,7 @@ upload_urb(struct brw_context *brw) OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); - OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index ce0b8ea..a10cec3 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -137,7 +137,7 @@ upload_vs_state(struct brw_context *brw) (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */ + OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) | GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 78901ec..8215cb1 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw) dw4 |= (brw->wm.prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); - dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->dispatch_width == 8) @@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); |