summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h36
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c65
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c120
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp572
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h32
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c52
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c273
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h44
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c34
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c41
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_state.c48
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c34
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c9
35 files changed, 778 insertions, 805 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index 412d82a..74a66af 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -238,10 +238,10 @@ static void upload_blend_constant_color(struct brw_context *brw)
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
- OUT_BATCH_F(ctx->Color.BlendColor[0]);
- OUT_BATCH_F(ctx->Color.BlendColor[1]);
- OUT_BATCH_F(ctx->Color.BlendColor[2]);
- OUT_BATCH_F(ctx->Color.BlendColor[3]);
+ OUT_BATCH_F(ctx->Color.BlendColorUnclamped[0]);
+ OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
+ OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
+ OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
CACHED_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..230d326 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -182,9 +182,21 @@ GLboolean brwCreateContext( int api,
/* WM maximum threads is number of EUs times number of threads per EU. */
if (intel->gen >= 6) {
- brw->urb.size = 1024;
- brw->vs_max_threads = 60;
- brw->wm_max_threads = 80;
+ if (IS_GT2(intel->intelScreen->deviceID)) {
+ /* This could possibly be 80, but is supposed to require
+ * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
+ * GPU reset to change.
+ */
+ brw->wm_max_threads = 40;
+ brw->vs_max_threads = 60;
+ brw->urb.size = 64; /* volume 5c.5 section 5.1 */
+ brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */
+ } else {
+ brw->wm_max_threads = 40;
+ brw->vs_max_threads = 24;
+ brw->urb.size = 32; /* volume 5c.5 section 5.1 */
+ brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */
+ }
} else if (intel->gen == 5) {
brw->urb.size = 1024;
brw->vs_max_threads = 72;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7b0551a..1daa49a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -139,7 +139,7 @@ struct brw_context;
* by any 3D rendering.
*/
#define BRW_NEW_BATCH 0x10000
-/** brw->depth_region updated */
+/** \see brw.state.depth_region */
#define BRW_NEW_DEPTH_BUFFER 0x20000
#define BRW_NEW_NR_WM_SURFACES 0x40000
#define BRW_NEW_NR_VS_SURFACES 0x80000
@@ -464,8 +464,27 @@ struct brw_context
struct {
struct brw_state_flags dirty;
+ /**
+ * \name Cached region pointers
+ *
+ * When the draw buffer is updated, often the depth buffer is not
+ * changed. Caching the pointer to the buffer's region allows us to
+ * detect when the buffer has in fact changed, and allows us to avoid
+ * updating the buffer's GPU state when it has not.
+ *
+ * The original of each cached pointer is an instance of
+ * \c intel_renderbuffer.region.
+ *
+ * \see brw_set_draw_region()
+ *
+ * \{
+ */
+
+ /** \see struct brw_tracked_state brw_depthbuffer */
struct intel_region *depth_region;
+ /** \} */
+
/**
* List of buffers accumulated in brw_validate_state to receive
* drm_intel_bo_check_aperture treatment before exec, so we can
@@ -549,18 +568,21 @@ struct brw_context
GLboolean constrained;
+ GLuint max_vs_handles; /* Maximum number of VS handles */
+ GLuint max_gs_handles; /* Maximum number of GS handles */
+
GLuint nr_vs_entries;
GLuint nr_gs_entries;
GLuint nr_clip_entries;
GLuint nr_sf_entries;
GLuint nr_cs_entries;
- /* gen6 */
+ /* gen6:
+ * The length of each URB entry owned by the VS (or GS), as
+ * a number of 1024-bit (128-byte) rows. Should be >= 1.
+ */
GLuint vs_size;
-/* GLuint gs_size; */
-/* GLuint clip_size; */
-/* GLuint sf_size; */
-/* GLuint cs_size; */
+ GLuint gs_size;
GLuint vs_start;
GLuint gs_start;
@@ -639,7 +661,9 @@ struct brw_context
drm_intel_bo *prog_bo;
drm_intel_bo *state_bo;
+ uint32_t state_offset;
drm_intel_bo *vp_bo;
+ uint32_t vp_offset;
} sf;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6c61aef..effcb6c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -43,6 +43,12 @@
#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
+#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */
+/* DW0 */
+# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
+
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
@@ -65,9 +71,6 @@
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
-#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
-#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
-
#define BRW_ANISORATIO_2 0
#define BRW_ANISORATIO_4 1
#define BRW_ANISORATIO_6 2
@@ -147,6 +150,7 @@
#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define BRW_DEPTHFORMAT_D32_FLOAT 1
#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */
#define BRW_DEPTHFORMAT_D16_UNORM 5
#define BRW_FLOATING_POINT_IEEE_754 0
@@ -1131,8 +1135,6 @@
#define CMD_PIPE_CONTROL 0x7a00
-#define CMD_3D_PRIM 0x7b00
-
#define CMD_MI_FLUSH 0x0200
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index f5abe02..2db70c5 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -129,30 +129,31 @@ static void brw_emit_prim(struct brw_context *brw,
const struct _mesa_prim *prim,
uint32_t hw_prim)
{
- struct brw_3d_primitive prim_packet;
struct intel_context *intel = &brw->intel;
+ int verts_per_instance;
+ int vertex_access_type;
+ int start_vertex_location;
+ int base_vertex_location;
DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
prim->start, prim->count);
- prim_packet.header.opcode = CMD_3D_PRIM;
- prim_packet.header.length = sizeof(prim_packet)/4 - 2;
- prim_packet.header.pad = 0;
- prim_packet.header.topology = hw_prim;
- prim_packet.header.indexed = prim->indexed;
-
- prim_packet.verts_per_instance = trim(prim->mode, prim->count);
- prim_packet.start_vert_location = prim->start;
- if (prim->indexed)
- prim_packet.start_vert_location += brw->ib.start_vertex_offset;
- else
- prim_packet.start_vert_location += brw->vb.start_vertex_bias;
- prim_packet.instance_count = 1;
- prim_packet.start_instance_location = 0;
- prim_packet.base_vert_location = prim->basevertex;
- if (prim->indexed)
- prim_packet.base_vert_location += brw->vb.start_vertex_bias;
+ start_vertex_location = prim->start;
+ base_vertex_location = prim->basevertex;
+ if (prim->indexed) {
+ vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+ start_vertex_location += brw->ib.start_vertex_offset;
+ base_vertex_location += brw->vb.start_vertex_bias;
+ } else {
+ vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ start_vertex_location += brw->vb.start_vertex_bias;
+ }
+
+ verts_per_instance = trim(prim->mode, prim->count);
+ /* If nothing to emit, just return. */
+ if (verts_per_instance == 0)
+ return;
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
@@ -162,10 +163,18 @@ static void brw_emit_prim(struct brw_context *brw,
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel);
}
- if (prim_packet.verts_per_instance) {
- intel_batchbuffer_data(&brw->intel, &prim_packet,
- sizeof(prim_packet), false);
- }
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+ hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+ vertex_access_type);
+ OUT_BATCH(verts_per_instance);
+ OUT_BATCH(start_vertex_location);
+ OUT_BATCH(1); // instance count
+ OUT_BATCH(0); // start instance location
+ OUT_BATCH(base_vertex_location);
+ ADVANCE_BATCH();
+
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel);
}
@@ -271,20 +280,20 @@ static GLboolean check_fallbacks( struct brw_context *brw,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
if (texUnit->Enabled) {
if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+ if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->Sampler.WrapS == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+ if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapT == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+ if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapS == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapT == GL_CLAMP ||
+ texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapR == GL_CLAMP) {
return GL_TRUE;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index f1d0069..9389eb6 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -575,7 +575,7 @@ static void brw_emit_vertices(struct brw_context *brw)
if (intel->gen >= 5) {
OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
} else
- OUT_BATCH(buffer->bo->size / buffer->stride);
+ OUT_BATCH(0);
OUT_BATCH(0); /* Instance data step rate */
brw->vb.current_buffers[i].handle = buffer->bo->handle;
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 3b5c4c0..7e63482 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -34,6 +34,28 @@
#include "brw_defines.h"
#include "brw_eu.h"
+/* Returns the corresponding conditional mod for swapping src0 and
+ * src1 in e.g. CMP.
+ */
+uint32_t
+brw_swap_cmod(uint32_t cmod)
+{
+ switch (cmod) {
+ case BRW_CONDITIONAL_Z:
+ case BRW_CONDITIONAL_NZ:
+ return cmod;
+ case BRW_CONDITIONAL_G:
+ return BRW_CONDITIONAL_LE;
+ case BRW_CONDITIONAL_GE:
+ return BRW_CONDITIONAL_L;
+ case BRW_CONDITIONAL_L:
+ return BRW_CONDITIONAL_GE;
+ case BRW_CONDITIONAL_LE:
+ return BRW_CONDITIONAL_G;
+ default:
+ return ~0;
+ }
+}
/* How does predicate control work when execution_size != 8? Do I
@@ -60,6 +82,11 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
p->current->header.predicate_control = pc;
}
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
+{
+ p->current->header.predicate_inverse = predicate_inverse;
+}
+
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
{
p->current->header.destreg__conditionalmod = conditional;
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 2d2ed9d..718b380 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -772,6 +772,7 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
void brw_set_compression_control( struct brw_compile *p, GLboolean control );
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
@@ -1017,6 +1018,8 @@ void brw_set_src1( struct brw_instruction *insn,
void brw_set_uip_jip(struct brw_compile *p);
+uint32_t brw_swap_cmod(uint32_t cmod);
+
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 21ce92c..71485cd 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -52,6 +52,34 @@ static void guess_execution_size(struct brw_compile *p,
}
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case. This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+static void
+gen6_resolve_implied_move(struct brw_compile *p,
+ struct brw_reg *src,
+ GLuint msg_reg_nr)
+{
+ struct intel_context *intel = &p->brw->intel;
+ if (intel->gen != 6)
+ return;
+
+ if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+ retype(*src, BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+ }
+ *src = brw_message_reg(msg_reg_nr);
+}
+
+
static void brw_set_dest(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg dest)
@@ -468,10 +496,9 @@ static void brw_set_dp_write_message( struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+
+ /* We always use the render cache for write messages */
insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- /* XXX really need below? */
- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits2.send_gen5.end_of_thread = end_of_thread;
} else if (intel->gen == 5) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
@@ -511,6 +538,13 @@ brw_set_dp_read_message(struct brw_context *brw,
brw_set_src1(insn, brw_imm_d(0));
if (intel->gen >= 6) {
+ uint32_t target_function;
+
+ if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
+ target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
+ else
+ target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */
+
insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
insn->bits3.dp_render_cache.msg_control = msg_control;
insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
@@ -520,10 +554,7 @@ brw_set_dp_read_message(struct brw_context *brw,
insn->bits3.dp_render_cache.response_length = response_length;
insn->bits3.dp_render_cache.msg_length = msg_length;
insn->bits3.dp_render_cache.end_of_thread = 0;
- insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
- /* XXX really need below? */
- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits2.send_gen5.end_of_thread = 0;
+ insn->header.destreg__conditionalmod = target_function;
} else if (intel->gen == 5) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
@@ -1458,9 +1489,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
- uint32_t msg_control;
+ uint32_t msg_control, msg_type;
int mlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
if (num_regs == 1) {
@@ -1526,13 +1560,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
}
brw_set_dest(p, insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, mrf);
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
brw_set_dp_write_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
- BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_type,
mlen,
GL_TRUE, /* header_present */
0, /* pixel scoreboard */
@@ -1557,9 +1600,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
int num_regs,
GLuint offset)
{
+ struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
int rlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
dest = retype(dest, BRW_REGISTER_TYPE_UW);
@@ -1596,14 +1643,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.destreg__conditionalmod = mrf.nr;
brw_set_dest(p, insn, dest); /* UW? */
- brw_set_src0(insn, brw_null_reg());
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, mrf);
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 1, /* target cache (render/scratch) */
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
@@ -1771,6 +1822,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
GLuint bind_table_index)
{
struct intel_context *intel = &p->brw->intel;
+ struct brw_reg src = brw_vec8_grf(0, 0);
int msg_type;
/* Setup MRF[1] with offset into const buffer */
@@ -1787,6 +1839,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
+ gen6_resolve_implied_move(p, &src, 0);
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
@@ -1795,7 +1848,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
insn->header.mask_control = BRW_MASK_DISABLE;
brw_set_dest(p, insn, dest);
- brw_set_src0(insn, brw_vec8_grf(0, 0));
+ brw_set_src0(insn, src);
if (intel->gen == 6)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
@@ -1809,7 +1862,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
- 0, /* source cache = data cache */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
@@ -1966,20 +2019,7 @@ void brw_SAMPLE(struct brw_compile *p,
{
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
- if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
- src0.nr != BRW_ARF_NULL) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
- brw_pop_insn_state(p);
- }
- src0 = brw_message_reg(msg_reg_nr);
- }
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
@@ -2034,17 +2074,7 @@ void brw_urb_WRITE(struct brw_compile *p,
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
- retype(src0, BRW_REGISTER_TYPE_UD));
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
- }
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2154,17 +2184,7 @@ void brw_ff_sync(struct brw_compile *p,
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
- retype(src0, BRW_REGISTER_TYPE_UD));
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
- }
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, dest);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8b3f5ad..5426925 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -177,6 +177,23 @@ type_size(const struct glsl_type *type)
}
}
+void
+fs_visitor::fail(const char *format, ...)
+{
+ if (!failed) {
+ failed = true;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ fprintf(stderr, "FS compile failed: ");
+
+ va_list va;
+ va_start(va, format);
+ vfprintf(stderr, format, va);
+ va_end(va);
+ }
+ }
+}
+
/**
* Returns how many MRFs an FS opcode will write over.
*
@@ -382,60 +399,32 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
void
fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
{
- const struct gl_builtin_uniform_desc *statevar = NULL;
-
- for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
- statevar = &_mesa_builtin_uniform_desc[i];
- if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
- break;
- }
-
- if (!statevar->name) {
- this->fail = true;
- printf("Failed to find builtin uniform `%s'\n", ir->name);
- return;
- }
-
- int array_count;
- if (ir->type->is_array()) {
- array_count = ir->type->length;
- } else {
- array_count = 1;
- }
+ const ir_state_slot *const slots = ir->state_slots;
+ assert(ir->state_slots != NULL);
- for (int a = 0; a < array_count; a++) {
- for (unsigned int i = 0; i < statevar->num_elements; i++) {
- struct gl_builtin_uniform_element *element = &statevar->elements[i];
- int tokens[STATE_LENGTH];
-
- memcpy(tokens, element->tokens, sizeof(element->tokens));
- if (ir->type->is_array()) {
- tokens[1] = a;
- }
-
- /* This state reference has already been setup by ir_to_mesa,
- * but we'll get the same index back here.
- */
- int index = _mesa_add_state_reference(this->fp->Base.Parameters,
- (gl_state_index *)tokens);
+ for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa, but we'll
+ * get the same index back here.
+ */
+ int index = _mesa_add_state_reference(this->fp->Base.Parameters,
+ (gl_state_index *)slots[i].tokens);
- /* Add each of the unique swizzles of the element as a
- * parameter. This'll end up matching the expected layout of
- * the array/matrix/structure we're trying to fill in.
- */
- int last_swiz = -1;
- for (unsigned int i = 0; i < 4; i++) {
- int swiz = GET_SWZ(element->swizzle, i);
- if (swiz == last_swiz)
- break;
- last_swiz = swiz;
+ /* Add each of the unique swizzles of the element as a parameter.
+ * This'll end up matching the expected layout of the
+ * array/matrix/structure we're trying to fill in.
+ */
+ int last_swiz = -1;
+ for (unsigned int j = 0; j < 4; j++) {
+ int swiz = GET_SWZ(slots[i].swizzle, j);
+ if (swiz == last_swiz)
+ break;
+ last_swiz = swiz;
- c->prog_data.param_convert[c->prog_data.nr_params] =
- PARAM_NO_CONVERT;
- this->param_index[c->prog_data.nr_params] = index;
- this->param_offset[c->prog_data.nr_params] = swiz;
- c->prog_data.nr_params++;
- }
+ c->prog_data.param_convert[c->prog_data.nr_params] =
+ PARAM_NO_CONVERT;
+ this->param_index[c->prog_data.nr_params] = index;
+ this->param_offset[c->prog_data.nr_params] = swiz;
+ c->prog_data.nr_params++;
}
}
}
@@ -451,15 +440,15 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
/* gl_FragCoord.x */
if (ir->pixel_center_integer) {
- emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
+ emit(BRW_OPCODE_MOV, wpos, this->pixel_x);
} else {
- emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
+ emit(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f));
}
wpos.reg_offset++;
/* gl_FragCoord.y */
if (!flip && ir->pixel_center_integer) {
- emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
+ emit(BRW_OPCODE_MOV, wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
@@ -469,22 +458,22 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
offset += c->key.drawable_height - 1.0;
}
- emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
+ emit(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset));
}
wpos.reg_offset++;
/* gl_FragCoord.z */
if (intel->gen >= 6) {
- emit(fs_inst(BRW_OPCODE_MOV, wpos,
- fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+ emit(BRW_OPCODE_MOV, wpos,
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
} else {
- emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ emit(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 2));
}
wpos.reg_offset++;
/* gl_FragCoord.w: Already set up in emit_interpolation */
- emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
+ emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
return reg;
}
@@ -503,7 +492,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
if (ir->type->is_array()) {
array_elements = ir->type->length;
if (array_elements == 0) {
- this->fail = true;
+ fail("dereferenced array '%s' has length 0\n", ir->name);
}
type = ir->type->fields.array;
} else {
@@ -523,37 +512,33 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
continue;
}
- if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 ||
- location == FRAG_ATTRIB_COL1)) {
+ bool is_gl_Color =
+ location == FRAG_ATTRIB_COL0 || location == FRAG_ATTRIB_COL1;
+
+ if (c->key.flat_shade && is_gl_Color) {
/* Constant interpolation (flat shading) case. The SF has
* handed us defined values in only the constant offset
* field of the setup reg.
*/
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- struct brw_reg interp = interp_reg(location, c);
+ for (unsigned int k = 0; k < type->vector_elements; k++) {
+ struct brw_reg interp = interp_reg(location, k);
interp = suboffset(interp, 3);
- emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp)));
+ emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
attr.reg_offset++;
}
} else {
/* Perspective interpolation case. */
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- struct brw_reg interp = interp_reg(location, c);
- emit(fs_inst(FS_OPCODE_LINTERP,
- attr,
- this->delta_x,
- this->delta_y,
- fs_reg(interp)));
+ for (unsigned int k = 0; k < type->vector_elements; k++) {
+ struct brw_reg interp = interp_reg(location, k);
+ emit(FS_OPCODE_LINTERP, attr,
+ this->delta_x, this->delta_y, fs_reg(interp));
attr.reg_offset++;
}
- if (intel->gen < 6) {
+ if (intel->gen < 6 && !(is_gl_Color && c->key.linear_color)) {
attr.reg_offset -= type->vector_elements;
- for (unsigned int c = 0; c < type->vector_elements; c++) {
- emit(fs_inst(BRW_OPCODE_MUL,
- attr,
- attr,
- this->pixel_w));
+ for (unsigned int k = 0; k < type->vector_elements; k++) {
+ emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
attr.reg_offset++;
}
}
@@ -572,28 +557,21 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
/* The frontfacing comes in as a bit in the thread payload. */
if (intel->gen >= 6) {
- emit(fs_inst(BRW_OPCODE_ASR,
- *reg,
- fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(15)));
- emit(fs_inst(BRW_OPCODE_NOT,
- *reg,
- *reg));
- emit(fs_inst(BRW_OPCODE_AND,
- *reg,
- *reg,
- fs_reg(1)));
+ emit(BRW_OPCODE_ASR, *reg,
+ fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
+ fs_reg(15));
+ emit(BRW_OPCODE_NOT, *reg, *reg);
+ emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1));
} else {
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
/* bit 31 is "primitive is back face", so checking < (1 << 31) gives
* us front face
*/
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
- *reg,
- fs_reg(r1_6ud),
- fs_reg(1u << 31)));
+ fs_inst *inst = emit(BRW_OPCODE_CMP, *reg,
+ fs_reg(r1_6ud),
+ fs_reg(1u << 31));
inst->conditional_mod = BRW_CONDITIONAL_L;
- emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
+ emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u));
}
return reg;
@@ -628,11 +606,11 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
src.abs ||
src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
- emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
+ emit(BRW_OPCODE_MOV, expanded, src);
src = expanded;
}
- fs_inst *inst = emit(fs_inst(opcode, dst, src));
+ fs_inst *inst = emit(opcode, dst, src);
if (intel->gen < 6) {
inst->base_mrf = 2;
@@ -658,20 +636,20 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
*/
if (src0.file == UNIFORM || src0.abs || src0.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
- emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
+ emit(BRW_OPCODE_MOV, expanded, src0);
src0 = expanded;
}
if (src1.file == UNIFORM || src1.abs || src1.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
- emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
+ emit(BRW_OPCODE_MOV, expanded, src1);
src1 = expanded;
}
- inst = emit(fs_inst(opcode, dst, src0, src1));
+ inst = emit(opcode, dst, src0, src1);
} else {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
- inst = emit(fs_inst(opcode, dst, src0, reg_null_f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
+ inst = emit(opcode, dst, src0, reg_null_f);
inst->base_mrf = base_mrf;
inst->mlen = 2;
@@ -788,7 +766,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
fs_reg src = this->result;
this->result = fs_reg(this, ir->type);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src));
+ fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
inst->saturate = true;
return true;
@@ -834,9 +812,8 @@ fs_visitor::visit(ir_expression *ir)
ir->operands[operand]->accept(this);
if (this->result.file == BAD_FILE) {
ir_print_visitor v;
- printf("Failed to get tree for expression operand:\n");
+ fail("Failed to get tree for expression operand:\n");
ir->operands[operand]->accept(&v);
- this->fail = true;
}
op[operand] = this->result;
@@ -859,7 +836,7 @@ fs_visitor::visit(ir_expression *ir)
/* Note that BRW_OPCODE_NOT is not appropriate here, since it is
* ones complement of the whole register, not just bit 0.
*/
- emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)));
+ emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1));
break;
case ir_unop_neg:
op[0].negate = !op[0].negate;
@@ -873,16 +850,16 @@ fs_visitor::visit(ir_expression *ir)
case ir_unop_sign:
temp = fs_reg(this, ir->type);
- emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
+ emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
+ inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
inst->conditional_mod = BRW_CONDITIONAL_G;
- inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
+ inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f));
inst->predicated = true;
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)));
+ inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
inst->conditional_mod = BRW_CONDITIONAL_L;
- inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
+ inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f));
inst->predicated = true;
break;
@@ -910,21 +887,21 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_dFdx:
- emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
+ emit(FS_OPCODE_DDX, this->result, op[0]);
break;
case ir_unop_dFdy:
- emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
+ emit(FS_OPCODE_DDY, this->result, op[0]);
break;
case ir_binop_add:
- emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
+ emit(BRW_OPCODE_ADD, this->result, op[0], op[1]);
break;
case ir_binop_sub:
assert(!"not reached: should be handled by ir_sub_to_add_neg");
break;
case ir_binop_mul:
- emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
+ emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
break;
case ir_binop_div:
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -946,21 +923,21 @@ fs_visitor::visit(ir_expression *ir)
if (intel->gen < 5)
temp.type = op[0].type;
- inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1]));
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
- emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1));
break;
case ir_binop_logic_xor:
- emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
+ emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
break;
case ir_binop_logic_or:
- emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
+ emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
break;
case ir_binop_logic_and:
- emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
+ emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
break;
case ir_binop_dot:
@@ -988,7 +965,7 @@ fs_visitor::visit(ir_expression *ir)
case ir_unop_b2f:
case ir_unop_b2i:
case ir_unop_f2i:
- emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
+ emit(BRW_OPCODE_MOV, this->result, op[0]);
break;
case ir_unop_f2b:
case ir_unop_i2b:
@@ -997,42 +974,41 @@ fs_visitor::visit(ir_expression *ir)
if (intel->gen < 5)
temp.type = op[0].type;
- inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
- this->result, fs_reg(1)));
+ inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
break;
case ir_unop_trunc:
- emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
+ emit(BRW_OPCODE_RNDZ, this->result, op[0]);
break;
case ir_unop_ceil:
op[0].negate = !op[0].negate;
- inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
+ inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
this->result.negate = true;
break;
case ir_unop_floor:
- inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
+ inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
break;
case ir_unop_fract:
- inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
+ inst = emit(BRW_OPCODE_FRC, this->result, op[0]);
break;
case ir_unop_round_even:
- emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0]));
+ emit(BRW_OPCODE_RNDE, this->result, op[0]);
break;
case ir_binop_min:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_L;
- inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
inst->predicated = true;
break;
case ir_binop_max:
- inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_G;
- inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
inst->predicated = true;
break;
@@ -1041,16 +1017,16 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_bit_not:
- inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0]));
+ inst = emit(BRW_OPCODE_NOT, this->result, op[0]);
break;
case ir_binop_bit_and:
- inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
break;
case ir_binop_bit_xor:
- inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
break;
case ir_binop_bit_or:
- inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
+ inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
break;
case ir_unop_u2f:
@@ -1074,7 +1050,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
l.type = brw_type_for_base_type(type);
r.type = brw_type_for_base_type(type);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
+ fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
inst->predicated = predicated;
l.reg_offset++;
@@ -1127,7 +1103,7 @@ fs_visitor::visit(ir_assignment *ir)
ir->lhs->type->is_vector()) {
for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
if (ir->write_mask & (1 << i)) {
- inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
+ inst = emit(BRW_OPCODE_MOV, l, r);
if (ir->condition)
inst->predicated = true;
r.reg_offset++;
@@ -1152,8 +1128,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
if (ir->shadow_comparitor) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
- coordinate));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1163,29 +1138,25 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
/* There's no plain shadow compare message, so we use shadow
* compare with a bias of 0.0.
*/
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- fs_reg(0.0f)));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
mlen++;
} else if (ir->op == ir_txb) {
ir->lod_info.bias->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
} else {
assert(ir->op == ir_txl);
ir->lod_info.lod->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
}
ir->shadow_comparitor->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
} else if (ir->op == ir_tex) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
- coordinate));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate.reg_offset++;
}
/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1199,8 +1170,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
assert(ir->op == ir_txb || ir->op == ir_txl);
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2),
- coordinate));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
coordinate.reg_offset++;
}
@@ -1209,13 +1179,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
if (ir->op == ir_txb) {
ir->lod_info.bias->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
} else {
ir->lod_info.lod->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
- this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
}
@@ -1236,16 +1204,16 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
fs_inst *inst = NULL;
switch (ir->op) {
case ir_tex:
- inst = emit(fs_inst(FS_OPCODE_TEX, dst));
+ inst = emit(FS_OPCODE_TEX, dst);
break;
case ir_txb:
- inst = emit(fs_inst(FS_OPCODE_TXB, dst));
+ inst = emit(FS_OPCODE_TXB, dst);
break;
case ir_txl:
- inst = emit(fs_inst(FS_OPCODE_TXL, dst));
+ inst = emit(FS_OPCODE_TXL, dst);
break;
case ir_txd:
- inst = emit(fs_inst(FS_OPCODE_TXD, dst));
+ inst = emit(FS_OPCODE_TXD, dst);
break;
case ir_txf:
assert(!"GLSL 1.30 features unsupported");
@@ -1256,7 +1224,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
if (simd16) {
for (int i = 0; i < 4; i++) {
- emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
+ emit(BRW_OPCODE_MOV, orig_dst, dst);
orig_dst.reg_offset++;
dst.reg_offset += 2;
}
@@ -1280,8 +1248,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
int base_mrf = 1;
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
- coordinate));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
coordinate.reg_offset++;
}
mlen += ir->coordinate->type->vector_elements;
@@ -1290,30 +1257,30 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
mlen = MAX2(mlen, 5);
ir->shadow_comparitor->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
}
fs_inst *inst = NULL;
switch (ir->op) {
case ir_tex:
- inst = emit(fs_inst(FS_OPCODE_TEX, dst));
+ inst = emit(FS_OPCODE_TEX, dst);
break;
case ir_txb:
ir->lod_info.bias->accept(this);
mlen = MAX2(mlen, 5);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
- inst = emit(fs_inst(FS_OPCODE_TXB, dst));
+ inst = emit(FS_OPCODE_TXB, dst);
break;
case ir_txl:
ir->lod_info.lod->accept(this);
mlen = MAX2(mlen, 5);
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
mlen++;
- inst = emit(fs_inst(FS_OPCODE_TXL, dst));
+ inst = emit(FS_OPCODE_TXL, dst);
break;
case ir_txd:
case ir_txf:
@@ -1356,14 +1323,14 @@ fs_visitor::visit(ir_texture *ir)
}
/* Explicitly set up the message header by copying g0 to msg reg m1. */
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
- fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
+ fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD));
/* Then set the offset bits in DWord 2 of the message header. */
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
- BRW_REGISTER_TYPE_UD)),
- fs_reg(brw_imm_uw(offset_bits))));
+ emit(BRW_OPCODE_MOV,
+ fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
+ BRW_REGISTER_TYPE_UD)),
+ fs_reg(brw_imm_uw(offset_bits)));
}
/* Should be lowered by do_lower_texture_projection */
@@ -1409,10 +1376,10 @@ fs_visitor::visit(ir_texture *ir)
fs_reg src = coordinate;
coordinate = dst;
- emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
+ emit(BRW_OPCODE_MUL, dst, src, scale_x);
dst.reg_offset++;
src.reg_offset++;
- emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
+ emit(BRW_OPCODE_MUL, dst, src, scale_y);
}
/* Writemasking doesn't eliminate channels on SIMD8 texture
@@ -1453,13 +1420,13 @@ fs_visitor::visit(ir_texture *ir)
l.reg_offset += i;
if (swiz == SWIZZLE_ZERO) {
- emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
+ emit(BRW_OPCODE_MOV, l, fs_reg(0.0f));
} else if (swiz == SWIZZLE_ONE) {
- emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
+ emit(BRW_OPCODE_MOV, l, fs_reg(1.0f));
} else {
fs_reg r = dst;
r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
- emit(fs_inst(BRW_OPCODE_MOV, l, r));
+ emit(BRW_OPCODE_MOV, l, r);
}
}
this->result = swizzle_dst;
@@ -1500,7 +1467,7 @@ fs_visitor::visit(ir_swizzle *ir)
}
channel.reg_offset += swiz;
- emit(fs_inst(BRW_OPCODE_MOV, result, channel));
+ emit(BRW_OPCODE_MOV, result, channel);
result.reg_offset++;
}
}
@@ -1512,8 +1479,8 @@ fs_visitor::visit(ir_discard *ir)
assert(ir->condition == NULL); /* FINISHME */
- emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null_d));
- emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null_d, temp));
+ emit(FS_OPCODE_DISCARD_NOT, temp, reg_null_d);
+ emit(FS_OPCODE_DISCARD_AND, reg_null_d, temp);
kill_emitted = true;
}
@@ -1539,7 +1506,7 @@ fs_visitor::visit(ir_constant *ir)
dst_reg.type = src_reg.type;
for (unsigned j = 0; j < size; j++) {
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ emit(BRW_OPCODE_MOV, dst_reg, src_reg);
src_reg.reg_offset++;
dst_reg.reg_offset++;
}
@@ -1554,7 +1521,7 @@ fs_visitor::visit(ir_constant *ir)
dst_reg.type = src_reg.type;
for (unsigned j = 0; j < size; j++) {
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ emit(BRW_OPCODE_MOV, dst_reg, src_reg);
src_reg.reg_offset++;
dst_reg.reg_offset++;
}
@@ -1565,16 +1532,16 @@ fs_visitor::visit(ir_constant *ir)
for (unsigned i = 0; i < size; i++) {
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])));
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]));
break;
case GLSL_TYPE_UINT:
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])));
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]));
break;
case GLSL_TYPE_INT:
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])));
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]));
break;
case GLSL_TYPE_BOOL:
- emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])));
+ emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]));
break;
default:
assert(!"Non-float/uint/int/bool constant");
@@ -1605,40 +1572,39 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
switch (expr->operation) {
case ir_unop_logic_not:
- inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)));
+ inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1));
inst->conditional_mod = BRW_CONDITIONAL_Z;
break;
case ir_binop_logic_xor:
- inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]));
+ inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
case ir_binop_logic_or:
- inst = emit(fs_inst(BRW_OPCODE_OR, reg_null_d, op[0], op[1]));
+ inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
case ir_binop_logic_and:
- inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d, op[0], op[1]));
+ inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
case ir_unop_f2b:
if (intel->gen >= 6) {
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d,
- op[0], fs_reg(0.0f)));
+ inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
} else {
- inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0]));
+ inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]);
}
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
case ir_unop_i2b:
if (intel->gen >= 6) {
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)));
+ inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
} else {
- inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0]));
+ inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]);
}
inst->conditional_mod = BRW_CONDITIONAL_NZ;
break;
@@ -1651,14 +1617,14 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
case ir_binop_all_equal:
case ir_binop_nequal:
case ir_binop_any_nequal:
- inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]));
+ inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
inst->conditional_mod =
brw_conditional_for_comparison(expr->operation);
break;
default:
assert(!"not reached");
- this->fail = true;
+ fail("bad cond code\n");
break;
}
return;
@@ -1667,11 +1633,10 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
ir->accept(this);
if (intel->gen >= 6) {
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null_d,
- this->result, fs_reg(1)));
+ fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
} else {
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, this->result));
+ fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result);
inst->conditional_mod = BRW_CONDITIONAL_NZ;
}
}
@@ -1700,36 +1665,36 @@ fs_visitor::emit_if_gen6(ir_if *ir)
switch (expr->operation) {
case ir_unop_logic_not:
- inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0)));
+ inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_Z;
return;
case ir_binop_logic_xor:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
inst->conditional_mod = BRW_CONDITIONAL_NZ;
return;
case ir_binop_logic_or:
temp = fs_reg(this, glsl_type::bool_type);
- emit(fs_inst(BRW_OPCODE_OR, temp, op[0], op[1]));
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
+ emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
return;
case ir_binop_logic_and:
temp = fs_reg(this, glsl_type::bool_type);
- emit(fs_inst(BRW_OPCODE_AND, temp, op[0], op[1]));
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)));
+ emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
return;
case ir_unop_f2b:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)));
+ inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
return;
case ir_unop_i2b:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
return;
@@ -1741,15 +1706,15 @@ fs_visitor::emit_if_gen6(ir_if *ir)
case ir_binop_all_equal:
case ir_binop_nequal:
case ir_binop_any_nequal:
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1]));
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
inst->conditional_mod =
brw_conditional_for_comparison(expr->operation);
return;
default:
assert(!"not reached");
- inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)));
+ inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- this->fail = true;
+ fail("bad condition\n");
return;
}
return;
@@ -1757,7 +1722,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
ir->condition->accept(this);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)));
+ fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
}
@@ -1776,7 +1741,7 @@ fs_visitor::visit(ir_if *ir)
} else {
emit_bool_to_cond_code(ir->condition);
- inst = emit(fs_inst(BRW_OPCODE_IF));
+ inst = emit(BRW_OPCODE_IF);
inst->predicated = true;
}
@@ -1788,7 +1753,7 @@ fs_visitor::visit(ir_if *ir)
}
if (!ir->else_instructions.is_empty()) {
- emit(fs_inst(BRW_OPCODE_ELSE));
+ emit(BRW_OPCODE_ELSE);
foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
ir_instruction *ir = (ir_instruction *)iter.get();
@@ -1798,7 +1763,7 @@ fs_visitor::visit(ir_if *ir)
}
}
- emit(fs_inst(BRW_OPCODE_ENDIF));
+ emit(BRW_OPCODE_ENDIF);
}
void
@@ -1815,21 +1780,20 @@ fs_visitor::visit(ir_loop *ir)
this->base_ir = ir->from;
ir->from->accept(this);
- emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
+ emit(BRW_OPCODE_MOV, counter, this->result);
}
}
- emit(fs_inst(BRW_OPCODE_DO));
+ emit(BRW_OPCODE_DO);
if (ir->to) {
this->base_ir = ir->to;
ir->to->accept(this);
- fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp,
- counter, this->result));
+ fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
- inst = emit(fs_inst(BRW_OPCODE_BREAK));
+ inst = emit(BRW_OPCODE_BREAK);
inst->predicated = true;
}
@@ -1843,10 +1807,10 @@ fs_visitor::visit(ir_loop *ir)
if (ir->increment) {
this->base_ir = ir->increment;
ir->increment->accept(this);
- emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
+ emit(BRW_OPCODE_ADD, counter, counter, this->result);
}
- emit(fs_inst(BRW_OPCODE_WHILE));
+ emit(BRW_OPCODE_WHILE);
}
void
@@ -1854,10 +1818,10 @@ fs_visitor::visit(ir_loop_jump *ir)
{
switch (ir->mode) {
case ir_loop_jump::jump_break:
- emit(fs_inst(BRW_OPCODE_BREAK));
+ emit(BRW_OPCODE_BREAK);
break;
case ir_loop_jump::jump_continue:
- emit(fs_inst(BRW_OPCODE_CONTINUE));
+ emit(BRW_OPCODE_CONTINUE);
break;
}
}
@@ -1923,23 +1887,13 @@ void
fs_visitor::emit_dummy_fs()
{
/* Everyone's favorite color. */
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, 2),
- fs_reg(1.0f)));
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, 3),
- fs_reg(0.0f)));
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, 4),
- fs_reg(1.0f)));
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, 5),
- fs_reg(0.0f)));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f));
fs_inst *write;
- write = emit(fs_inst(FS_OPCODE_FB_WRITE,
- fs_reg(0),
- fs_reg(0)));
+ write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
write->base_mrf = 0;
}
@@ -1969,14 +1923,14 @@ fs_visitor::emit_interpolation_setup_gen4()
this->pixel_y = fs_reg(this, glsl_type::uint_type);
this->pixel_x.type = BRW_REGISTER_TYPE_UW;
this->pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(fs_inst(BRW_OPCODE_ADD,
- this->pixel_x,
- fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010))));
- emit(fs_inst(BRW_OPCODE_ADD,
- this->pixel_y,
- fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100))));
+ emit(BRW_OPCODE_ADD,
+ this->pixel_x,
+ fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x10101010)));
+ emit(BRW_OPCODE_ADD,
+ this->pixel_y,
+ fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x11001100)));
this->current_annotation = "compute pixel deltas from v0";
if (brw->has_pln) {
@@ -1987,22 +1941,18 @@ fs_visitor::emit_interpolation_setup_gen4()
this->delta_x = fs_reg(this, glsl_type::float_type);
this->delta_y = fs_reg(this, glsl_type::float_type);
}
- emit(fs_inst(BRW_OPCODE_ADD,
- this->delta_x,
- this->pixel_x,
- fs_reg(negate(brw_vec1_grf(1, 0)))));
- emit(fs_inst(BRW_OPCODE_ADD,
- this->delta_y,
- this->pixel_y,
- fs_reg(negate(brw_vec1_grf(1, 1)))));
+ emit(BRW_OPCODE_ADD, this->delta_x,
+ this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
+ emit(BRW_OPCODE_ADD, this->delta_y,
+ this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
this->current_annotation = "compute pos.w and 1/pos.w";
/* Compute wpos.w. It's always in our setup, since it's needed to
* interpolate the other attributes.
*/
this->wpos_w = fs_reg(this, glsl_type::float_type);
- emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 3)));
+ emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = fs_reg(this, glsl_type::float_type);
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
@@ -2021,14 +1971,14 @@ fs_visitor::emit_interpolation_setup_gen6()
fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
int_pixel_x.type = BRW_REGISTER_TYPE_UW;
int_pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(fs_inst(BRW_OPCODE_ADD,
- int_pixel_x,
- fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010))));
- emit(fs_inst(BRW_OPCODE_ADD,
- int_pixel_y,
- fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100))));
+ emit(BRW_OPCODE_ADD,
+ int_pixel_x,
+ fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x10101010)));
+ emit(BRW_OPCODE_ADD,
+ int_pixel_y,
+ fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x11001100)));
/* As of gen6, we can no longer mix float and int sources. We have
* to turn the integer pixel centers into floats for their actual
@@ -2036,13 +1986,13 @@ fs_visitor::emit_interpolation_setup_gen6()
*/
this->pixel_x = fs_reg(this, glsl_type::float_type);
this->pixel_y = fs_reg(this, glsl_type::float_type);
- emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
- emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
+ emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x);
+ emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y);
- this->current_annotation = "compute 1/pos.w";
- this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
- this->pixel_w = fs_reg(this, glsl_type::float_type);
- emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+ this->current_annotation = "compute pos.w";
+ this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
+ this->wpos_w = fs_reg(this, glsl_type::float_type);
+ emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
this->delta_x = fs_reg(brw_vec8_grf(2, 0));
this->delta_y = fs_reg(brw_vec8_grf(3, 0));
@@ -2069,8 +2019,8 @@ fs_visitor::emit_fb_writes()
}
if (c->aa_dest_stencil_reg) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
}
/* Reserve space for color. It'll be filled in per MRT below. */
@@ -2083,17 +2033,17 @@ fs_visitor::emit_fb_writes()
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth);
} else {
/* Pass through the payload depth. */
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
}
}
if (c->dest_depth_reg) {
- emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
}
fs_reg color = reg_undef;
@@ -2110,9 +2060,7 @@ fs_visitor::emit_fb_writes()
target);
if (this->frag_color || this->frag_data) {
for (int i = 0; i < 4; i++) {
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, color_mrf + i),
- color));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i), color);
color.reg_offset++;
}
}
@@ -2120,8 +2068,7 @@ fs_visitor::emit_fb_writes()
if (this->frag_color)
color.reg_offset -= 4;
- fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
- reg_undef, reg_undef));
+ fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
inst->target = target;
inst->base_mrf = 0;
inst->mlen = nr;
@@ -2137,13 +2084,10 @@ fs_visitor::emit_fb_writes()
* renderbuffer.
*/
color.reg_offset += 3;
- emit(fs_inst(BRW_OPCODE_MOV,
- fs_reg(MRF, color_mrf + 3),
- color));
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color);
}
- fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
- reg_undef, reg_undef));
+ fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
inst->base_mrf = 0;
inst->mlen = nr;
inst->eot = true;
@@ -2868,8 +2812,7 @@ fs_visitor::calculate_live_intervals()
if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
int reg = inst->src[i].reg;
- if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 &&
- def[reg] >= bb_header_ip)) {
+ if (!loop_depth) {
use[reg] = ip;
} else {
def[reg] = MIN2(loop_start, def[reg]);
@@ -2885,8 +2828,7 @@ fs_visitor::calculate_live_intervals()
if (inst->dst.file == GRF && inst->dst.reg != 0) {
int reg = inst->dst.reg;
- if (!loop_depth || (this->virtual_grf_sizes[reg] == 1 &&
- !inst->predicated)) {
+ if (!loop_depth) {
def[reg] = MIN2(def[reg], ip);
} else {
def[reg] = MIN2(def[reg], loop_start);
@@ -2996,12 +2938,41 @@ fs_visitor::propagate_constants()
progress = true;
}
break;
+
case BRW_OPCODE_CMP:
+ if (i == 1) {
+ scan_inst->src[i] = inst->src[0];
+ progress = true;
+ } else if (i == 0 && scan_inst->src[1].file != IMM) {
+ uint32_t new_cmod;
+
+ new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
+ if (new_cmod != ~0u) {
+ /* Fit this constant in by swapping the operands and
+ * flipping the test
+ */
+ scan_inst->src[0] = scan_inst->src[1];
+ scan_inst->src[1] = inst->src[0];
+ scan_inst->conditional_mod = new_cmod;
+ progress = true;
+ }
+ }
+ break;
+
case BRW_OPCODE_SEL:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
+ } else if (i == 0 && scan_inst->src[1].file != IMM) {
+ /* Fit this constant in by swapping the operands and
+ * flipping the predicate
+ */
+ scan_inst->src[0] = scan_inst->src[1];
+ scan_inst->src[1] = inst->src[0];
+ scan_inst->predicate_inverse = !scan_inst->predicate_inverse;
+ progress = true;
}
+ break;
}
}
@@ -3487,6 +3458,7 @@ fs_visitor::generate_code()
brw_set_conditionalmod(p, inst->conditional_mod);
brw_set_predicate_control(p, inst->predicated);
+ brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
switch (inst->opcode) {
@@ -3677,7 +3649,7 @@ fs_visitor::generate_code()
} else {
_mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
}
- this->fail = true;
+ fail("unsupported opcode in FS\n");
}
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
@@ -3808,18 +3780,18 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
v.assign_regs_trivial();
else {
while (!v.assign_regs()) {
- if (v.fail)
+ if (v.failed)
break;
}
}
}
- if (!v.fail)
+ if (!v.failed)
v.generate_code();
- assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
+ assert(!v.failed); /* FINISHME: Cleanly fail, tested at link time, etc. */
- if (v.fail)
+ if (v.failed)
return GL_FALSE;
c->prog_data.total_grf = v.grf_used;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index dc030ae..f792906 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -331,6 +331,7 @@ public:
fs_reg src[3];
bool saturate;
bool predicated;
+ bool predicate_inverse;
int conditional_mod; /**< BRW_CONDITIONAL_* */
int mlen; /**< SEND message length */
@@ -364,7 +365,7 @@ public:
this->ctx = &intel->ctx;
this->mem_ctx = ralloc_context(NULL);
this->shader = shader;
- this->fail = false;
+ this->failed = false;
this->variable_ht = hash_table_ctor(0,
hash_table_pointer_hash,
hash_table_pointer_compare);
@@ -432,6 +433,32 @@ public:
void visit(ir_function_signature *ir);
fs_inst *emit(fs_inst inst);
+
+ fs_inst *emit(int opcode)
+ {
+ return emit(fs_inst(opcode));
+ }
+
+ fs_inst *emit(int opcode, fs_reg dst)
+ {
+ return emit(fs_inst(opcode, dst));
+ }
+
+ fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+ {
+ return emit(fs_inst(opcode, dst, src0));
+ }
+
+ fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ {
+ return emit(fs_inst(opcode, dst, src0, src1));
+ }
+
+ fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ {
+ return emit(fs_inst(opcode, dst, src0, src1, src2));
+ }
+
void setup_paramvalues_refs();
void assign_curb_setup();
void calculate_urb_setup();
@@ -450,6 +477,7 @@ public:
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
void schedule_instructions();
+ void fail(const char *msg, ...);
void generate_code();
void generate_fb_write(fs_inst *inst);
@@ -523,7 +551,7 @@ public:
ir_instruction *base_ir;
/** @} */
- bool fail;
+ bool failed;
/* Result of last visit() method. */
fs_reg result;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f027742..67f29ce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -119,8 +119,7 @@ fs_visitor::assign_regs()
}
if (i == class_count) {
if (this->virtual_grf_sizes[r] >= base_reg_count) {
- fprintf(stderr, "Object too large to register allocate.\n");
- this->fail = true;
+ fail("Object too large to register allocate.\n");
}
class_sizes[class_count++] = this->virtual_grf_sizes[r];
@@ -226,8 +225,9 @@ fs_visitor::assign_regs()
* loop back into here to try again.
*/
int reg = choose_spill_reg(g);
- if (reg == -1 || intel->gen >= 6) {
- this->fail = true;
+
+ if (reg == -1) {
+ fail("no register to spill\n");
} else {
spill_reg(reg);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 70c451d..14ee676 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -83,45 +83,23 @@ static void compile_gs_prog( struct brw_context *brw,
/* Note that primitives which don't require a GS program have
* already been weeded out by this stage:
*/
+
+ /* Gen6: VF has already converted into polygon, and LINELOOP is
+ * converted to LINESTRIP at the beginning of the 3D pipeline.
+ */
+ if (intel->gen == 6)
+ return;
+
switch (key->primitive) {
case GL_QUADS:
- /* Gen6: VF has already converted into polygon. */
- if (intel->gen == 6)
- return;
brw_gs_quads( &c, key );
break;
case GL_QUAD_STRIP:
- if (intel->gen == 6)
- return;
brw_gs_quad_strip( &c, key );
break;
case GL_LINE_LOOP:
- /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */
- if (intel->gen == 6)
- return;
brw_gs_lines( &c );
break;
- case GL_LINES:
- if (key->hint_gs_always)
- brw_gs_lines( &c );
- else {
- return;
- }
- break;
- case GL_TRIANGLES:
- if (key->hint_gs_always)
- brw_gs_tris( &c );
- else {
- return;
- }
- break;
- case GL_POINTS:
- if (key->hint_gs_always)
- brw_gs_points( &c );
- else {
- return;
- }
- break;
default:
return;
}
@@ -170,7 +148,6 @@ static void populate_key( struct brw_context *brw,
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
- int prim_gs_always;
memset(key, 0, sizeof(*key));
@@ -180,8 +157,6 @@ static void populate_key( struct brw_context *brw,
/* BRW_NEW_PRIMITIVE */
key->primitive = gs_prim[brw->primitive];
- key->hint_gs_always = 0; /* debug code? */
-
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
@@ -191,14 +166,11 @@ static void populate_key( struct brw_context *brw,
key->pv_first = GL_TRUE;
}
- if (intel->gen == 6)
- prim_gs_always = 0;
- else
- prim_gs_always = brw->primitive == GL_QUADS ||
- brw->primitive == GL_QUAD_STRIP ||
- brw->primitive == GL_LINE_LOOP;
-
- key->need_gs_prog = (key->hint_gs_always || prim_gs_always);
+ key->need_gs_prog = (intel->gen == 6)
+ ? 0
+ : (brw->primitive == GL_QUADS ||
+ brw->primitive == GL_QUAD_STRIP ||
+ brw->primitive == GL_LINE_LOOP);
}
/* Calculate interpolants for triangle and line rasterization.
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
index 7e35310..c33528e 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.h
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -42,10 +42,9 @@
struct brw_gs_prog_key {
GLbitfield64 attrs;
GLuint primitive:4;
- GLuint hint_gs_always:1;
GLuint pv_first:1;
GLuint need_gs_prog:1;
- GLuint pad:25;
+ GLuint pad:26;
};
struct brw_gs_compile {
@@ -70,8 +69,6 @@ struct brw_gs_compile {
void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
-void brw_gs_tris( struct brw_gs_compile *c );
void brw_gs_lines( struct brw_gs_compile *c );
-void brw_gs_points( struct brw_gs_compile *c );
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index e1f751f..3bb526b 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -193,19 +193,6 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
}
}
-void brw_gs_tris( struct brw_gs_compile *c )
-{
- struct intel_context *intel = &c->func.brw->intel;
-
- brw_gs_alloc_regs(c, 3);
-
- if (intel->needs_ff_sync)
- brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
- brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
- brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
-}
-
void brw_gs_lines( struct brw_gs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
@@ -217,22 +204,3 @@ void brw_gs_lines( struct brw_gs_compile *c )
brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
}
-
-void brw_gs_points( struct brw_gs_compile *c )
-{
- struct intel_context *intel = &c->func.brw->intel;
-
- brw_gs_alloc_regs(c, 1);
-
- if (intel->needs_ff_sync)
- brw_gs_ff_sync(c, 1);
- brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
-}
-
-
-
-
-
-
-
-
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index c768be2..19eea07 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -149,7 +149,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
else
OUT_BATCH(0);
OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
- OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->sf.state_offset);
OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.state_offset);
@@ -247,8 +248,7 @@ static void emit_depthbuffer(struct brw_context *brw)
}
assert(region->tiling != I915_TILING_X);
- if (intel->gen >= 6)
- assert(region->tiling != I915_TILING_NONE);
+ assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
BEGIN_BATCH(len);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
@@ -283,6 +283,9 @@ static void emit_depthbuffer(struct brw_context *brw)
}
}
+/**
+ * \see brw_context.state.depth_region
+ */
const struct brw_tracked_state brw_depthbuffer = {
.dirty = {
.mesa = 0,
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index ee68095..6674f16 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -134,11 +134,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
brw_fragment_program_const(brw->fragment_program);
struct gl_shader_program *shader_program;
- if (fprog->FogOption) {
- _mesa_append_fog_code(ctx, fprog);
- fprog->FogOption = GL_NONE;
- }
-
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index bd3a21e..66d91a0 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -38,14 +38,16 @@
static void upload_sf_vp(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
- struct brw_sf_viewport sfv;
+ struct brw_sf_viewport *sfv;
GLfloat y_scale, y_bias;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
const GLfloat *v = ctx->Viewport._WindowMap.m;
- memset(&sfv, 0, sizeof(sfv));
+ sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset);
+ memset(sfv, 0, sizeof(*sfv));
if (render_to_fbo) {
y_scale = 1.0;
@@ -58,12 +60,12 @@ static void upload_sf_vp(struct brw_context *brw)
/* _NEW_VIEWPORT */
- sfv.viewport.m00 = v[MAT_SX];
- sfv.viewport.m11 = v[MAT_SY] * y_scale;
- sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
- sfv.viewport.m30 = v[MAT_TX];
- sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
- sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+ sfv->viewport.m00 = v[MAT_SX];
+ sfv->viewport.m11 = v[MAT_SY] * y_scale;
+ sfv->viewport.m22 = v[MAT_SZ] * depth_scale;
+ sfv->viewport.m30 = v[MAT_TX];
+ sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+ sfv->viewport.m32 = v[MAT_TZ] * depth_scale;
/* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
* for DrawBuffer->_[XY]{min,max}
@@ -85,27 +87,31 @@ static void upload_sf_vp(struct brw_context *brw)
* anything. Instead, just provide a min > max scissor inside
* the bounds, which produces the expected no rendering.
*/
- sfv.scissor.xmin = 1;
- sfv.scissor.xmax = 0;
- sfv.scissor.ymin = 1;
- sfv.scissor.ymax = 0;
+ sfv->scissor.xmin = 1;
+ sfv->scissor.xmax = 0;
+ sfv->scissor.ymin = 1;
+ sfv->scissor.ymax = 0;
} else if (render_to_fbo) {
/* texmemory: Y=0=bottom */
- sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
- sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
- sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
- sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+ sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
+ sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ sfv->scissor.ymin = ctx->DrawBuffer->_Ymin;
+ sfv->scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
}
else {
/* memory: Y=0=top */
- sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
- sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
- sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
- sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+ sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
+ sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+ sfv->scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+ sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
}
+ /* Keep a pointer to it for brw_state_dump.c */
drm_intel_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv));
+ drm_intel_bo_reference(intel->batch.bo);
+ brw->sf.vp_bo = intel->batch.bo;
+
+ brw->state.dirty.cache |= CACHE_NEW_SF_VP;
}
const struct brw_tracked_state brw_sf_vp = {
@@ -113,92 +119,44 @@ const struct brw_tracked_state brw_sf_vp = {
.mesa = (_NEW_VIEWPORT |
_NEW_SCISSOR |
_NEW_BUFFERS),
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0
},
.prepare = upload_sf_vp
};
-struct brw_sf_unit_key {
- unsigned int total_grf;
- unsigned int urb_entry_read_length;
-
- unsigned int nr_urb_entries, urb_size, sfsize;
-
- GLenum front_face, cull_face;
- unsigned pv_first:1;
- unsigned scissor:1;
- unsigned line_smooth:1;
- unsigned point_sprite:1;
- unsigned use_vs_point_size:1;
- unsigned render_to_fbo:1;
- float line_width;
- float point_size;
-};
-
-static void
-sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_SF_PROG */
- key->total_grf = brw->sf.prog_data->total_grf;
- key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_sf_entries;
- key->urb_size = brw->urb.vsize;
- key->sfsize = brw->urb.sfsize;
-
- key->scissor = ctx->Scissor.Enabled;
- key->front_face = ctx->Polygon.FrontFace;
-
- if (ctx->Polygon.CullFlag)
- key->cull_face = ctx->Polygon.CullFaceMode;
- else
- key->cull_face = GL_NONE;
-
- key->line_width = ctx->Line.Width;
- key->line_smooth = ctx->Line.SmoothFlag;
-
- key->point_sprite = ctx->Point.PointSprite;
- key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
- key->use_vs_point_size = (ctx->VertexProgram.PointSizeEnabled ||
- ctx->Point._Attenuated);
-
- /* _NEW_LIGHT */
- key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
-
- key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
-}
-
-static drm_intel_bo *
-sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
- drm_intel_bo **reloc_bufs)
+static void upload_sf_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
- struct brw_sf_unit_state sf;
- drm_intel_bo *bo;
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_sf_unit_state *sf;
+ drm_intel_bo *bo = intel->batch.bo;
int chipset_max_threads;
- memset(&sf, 0, sizeof(sf));
+ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+
+ sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset);
- sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+ memset(sf, 0, sizeof(*sf));
+
+ /* CACHE_NEW_SF_PROG */
+ sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+ sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
- sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- sf.thread3.dispatch_grf_start_reg = 3;
+ sf->thread3.dispatch_grf_start_reg = 3;
if (intel->gen == 5)
- sf.thread3.urb_entry_read_offset = 3;
+ sf->thread3.urb_entry_read_offset = 3;
else
- sf.thread3.urb_entry_read_offset = 1;
+ sf->thread3.urb_entry_read_offset = 1;
- sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ /* CACHE_NEW_SF_PROG */
+ sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
- sf.thread4.nr_urb_entries = key->nr_urb_entries;
- sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+ /* BRW_NEW_URB_FENCE */
+ sf->thread4.nr_urb_entries = brw->urb.nr_sf_entries;
+ sf->thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
/* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
* 48 (Ironlake) threads.
@@ -208,46 +166,51 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
else
chipset_max_threads = 24;
- sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+ /* BRW_NEW_URB_FENCE */
+ sf->thread4.max_threads = MIN2(chipset_max_threads,
+ brw->urb.nr_sf_entries) - 1;
if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
- sf.thread4.max_threads = 0;
+ sf->thread4.max_threads = 0;
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- sf.thread4.stats_enable = 1;
+ sf->thread4.stats_enable = 1;
/* CACHE_NEW_SF_VP */
- sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+ sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset +
+ brw->sf.vp_offset) >> 5; /* reloc */
- sf.sf5.viewport_transform = 1;
+ sf->sf5.viewport_transform = 1;
/* _NEW_SCISSOR */
- if (key->scissor)
- sf.sf6.scissor = 1;
+ if (ctx->Scissor.Enabled)
+ sf->sf6.scissor = 1;
/* _NEW_POLYGON */
- if (key->front_face == GL_CCW)
- sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+ if (ctx->Polygon.FrontFace == GL_CCW)
+ sf->sf5.front_winding = BRW_FRONTWINDING_CCW;
else
- sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+ sf->sf5.front_winding = BRW_FRONTWINDING_CW;
- /* The viewport is inverted for rendering to a FBO, and that inverts
+ /* _NEW_BUFFERS
+ * The viewport is inverted for rendering to a FBO, and that inverts
* polygon front/back orientation.
*/
- sf.sf5.front_winding ^= key->render_to_fbo;
+ sf->sf5.front_winding ^= render_to_fbo;
- switch (key->cull_face) {
+ /* _NEW_POLYGON */
+ switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) {
case GL_FRONT:
- sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+ sf->sf6.cull_mode = BRW_CULLMODE_FRONT;
break;
case GL_BACK:
- sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+ sf->sf6.cull_mode = BRW_CULLMODE_BACK;
break;
case GL_FRONT_AND_BACK:
- sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+ sf->sf6.cull_mode = BRW_CULLMODE_BOTH;
break;
case GL_NONE:
- sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+ sf->sf6.cull_mode = BRW_CULLMODE_NONE;
break;
default:
assert(0);
@@ -256,19 +219,18 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
/* _NEW_LINE */
/* XXX use ctx->Const.Min/MaxLineWidth here */
- sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+ sf->sf6.line_width = CLAMP(ctx->Line.Width, 1.0, 5.0) * (1<<1);
- sf.sf6.line_endcap_aa_region_width = 1;
- if (key->line_smooth)
- sf.sf6.aa_enable = 1;
- else if (sf.sf6.line_width <= 0x2)
- sf.sf6.line_width = 0;
+ sf->sf6.line_endcap_aa_region_width = 1;
+ if (ctx->Line.SmoothFlag)
+ sf->sf6.aa_enable = 1;
+ else if (sf->sf6.line_width <= 0x2)
+ sf->sf6.line_width = 0;
/* _NEW_BUFFERS */
- key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
- if (!key->render_to_fbo) {
+ if (!render_to_fbo) {
/* Rendering to an OpenGL window */
- sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+ sf->sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
}
else {
/* If rendering to an FBO, the pixel coordinate system is
@@ -290,74 +252,56 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
* incorrectly, which is no worse than occurs without
* the value, so we're using it here.
*/
- sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+ sf->sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
}
/* XXX clamp max depends on AA vs. non-AA */
/* _NEW_POINT */
- sf.sf7.sprite_point = key->point_sprite;
- sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
- sf.sf7.use_point_size_state = !key->use_vs_point_size;
- sf.sf7.aa_line_distance_mode = 0;
+ sf->sf7.sprite_point = ctx->Point.PointSprite;
+ sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
+ ctx->Point.MinSize,
+ ctx->Point.MaxSize)), 1, 255) * (1<<3);
+ sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
+ ctx->Point._Attenuated);
+ sf->sf7.aa_line_distance_mode = 0;
/* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+ * _NEW_LIGHT
*/
- if (!key->pv_first) {
- sf.sf7.trifan_pv = 2;
- sf.sf7.linestrip_pv = 1;
- sf.sf7.tristrip_pv = 2;
+ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+ sf->sf7.trifan_pv = 2;
+ sf->sf7.linestrip_pv = 1;
+ sf->sf7.tristrip_pv = 2;
} else {
- sf.sf7.trifan_pv = 1;
- sf.sf7.linestrip_pv = 0;
- sf.sf7.tristrip_pv = 0;
+ sf->sf7.trifan_pv = 1;
+ sf->sf7.linestrip_pv = 0;
+ sf->sf7.tristrip_pv = 0;
}
- sf.sf7.line_last_pixel_enable = 0;
+ sf->sf7.line_last_pixel_enable = 0;
/* Set bias for OpenGL rasterization rules:
*/
- sf.sf6.dest_org_vbias = 0x8;
- sf.sf6.dest_org_hbias = 0x8;
-
- bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
- key, sizeof(*key),
- reloc_bufs, 2,
- &sf, sizeof(sf));
+ sf->sf6.dest_org_vbias = 0x8;
+ sf->sf6.dest_org_hbias = 0x8;
/* STATE_PREFETCH command description describes this state as being
* something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
*/
/* Emit SF program relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, thread0),
- brw->sf.prog_bo, sf.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
+ offsetof(struct brw_sf_unit_state, thread0)),
+ brw->sf.prog_bo, sf->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
/* Emit SF viewport relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, sf5),
- brw->sf.vp_bo, (sf.sf5.front_winding |
- (sf.sf5.viewport_transform << 1)),
+ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
+ offsetof(struct brw_sf_unit_state, sf5)),
+ intel->batch.bo, (brw->sf.vp_offset |
+ sf->sf5.front_winding |
+ (sf->sf5.viewport_transform << 1)),
I915_GEM_DOMAIN_INSTRUCTION, 0);
- return bo;
-}
-
-static void upload_sf_unit( struct brw_context *brw )
-{
- struct brw_sf_unit_key key;
- drm_intel_bo *reloc_bufs[2];
-
- sf_unit_populate_key(brw, &key);
-
- reloc_bufs[0] = brw->sf.prog_bo;
- reloc_bufs[1] = brw->sf.vp_bo;
-
- drm_intel_bo_unreference(brw->sf.state_bo);
- brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
- &key, sizeof(key),
- reloc_bufs, 2,
- NULL);
- if (brw->sf.state_bo == NULL) {
- brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
- }
+ brw->state.dirty.cache |= CACHE_NEW_SF_UNIT;
}
const struct brw_tracked_state brw_sf_unit = {
@@ -368,7 +312,8 @@ const struct brw_tracked_state brw_sf_unit = {
_NEW_POINT |
_NEW_SCISSOR |
_NEW_BUFFERS),
- .brw = BRW_NEW_URB_FENCE,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_URB_FENCE),
.cache = (CACHE_NEW_SF_VP |
CACHE_NEW_SF_PROG)
},
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index fdce79d..b393259 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -195,8 +195,8 @@ static void dump_sf_viewport_state(struct brw_context *brw)
drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE);
- vp = brw->sf.vp_bo->virtual;
- vp_off = brw->sf.vp_bo->offset;
+ vp = brw->sf.vp_bo->virtual + brw->sf.vp_offset;
+ vp_off = brw->sf.vp_bo->offset + brw->sf.vp_offset;
state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 6687a89..8d4797fb 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -80,25 +80,6 @@ struct brw_3d_control
GLuint dword3;
};
-
-struct brw_3d_primitive
-{
- struct
- {
- GLuint length:8;
- GLuint pad:2;
- GLuint topology:5;
- GLuint indexed:1;
- GLuint opcode:16;
- } header;
-
- GLuint verts_per_instance;
- GLuint start_vert_location;
- GLuint instance_count;
- GLuint start_instance_location;
- GLuint base_vert_location;
-};
-
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
@@ -1247,31 +1228,6 @@ struct brw_surface_state
};
-
-struct brw_vertex_buffer_state
-{
- struct {
- GLuint pitch:11;
- GLuint pad:15;
- GLuint access_type:1;
- GLuint vb_index:5;
- } vb0;
-
- GLuint start_addr;
- GLuint max_index;
-#if 1
- GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
-#endif
-};
-
-#define BRW_VBP_MAX 17
-
-struct brw_vb_array_state {
- struct header header;
- struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
-};
-
-
struct brw_vertex_element_state
{
struct
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 6ae75d2..63ae131 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -132,6 +132,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
ctx->Polygon.BackMode != GL_FILL);
key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+ /* _NEW_LIGHT | _NEW_BUFFERS */
+ key.clamp_vertex_color = ctx->Light._ClampVertexColor;
+
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
for (i = 0; i < 8; i++) {
@@ -158,7 +161,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT,
+ .mesa = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
+ _NEW_BUFFERS),
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 0b88cc1..7ca84a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -45,6 +45,7 @@ struct brw_vs_prog_key {
GLuint copy_edgeflag:1;
GLuint point_coord_replace:8;
GLuint two_side_color: 1;
+ GLuint clamp_vertex_color:1;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index acacf37..dd4e1e6 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -437,8 +437,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
if (c->key.nr_userclip)
header_regs += 2;
+ /* Each attribute is 16 bytes (1 vec4), so dividing by 8 gives us the
+ * number of 128-byte (1024-bit) units.
+ */
c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8;
} else if (intel->gen == 5)
+ /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the
+ * number of 64-byte (512-bit) units.
+ */
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -2215,7 +2221,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
* instructions. Instead, we directly modify the header
* of the last (already stored) instruction.
*/
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ if (inst->DstReg.File == PROGRAM_OUTPUT &&
+ c->key.clamp_vertex_color) {
if ((inst->DstReg.Index == VERT_RESULT_COL0)
|| (inst->DstReg.Index == VERT_RESULT_COL1)
|| (inst->DstReg.Index == VERT_RESULT_BFC0)
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 152ee14..ce8712a 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -115,13 +115,11 @@ static void brw_set_draw_region( struct intel_context *intel,
{
struct brw_context *brw = brw_context(&intel->ctx);
- /* release old color/depth regions */
- if (brw->state.depth_region != depth_region)
+ if (brw->state.depth_region != depth_region) {
brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
- intel_region_release(&brw->state.depth_region);
-
- /* reference new color/depth regions */
- intel_region_reference(&brw->state.depth_region, depth_region);
+ intel_region_release(&brw->state.depth_region);
+ intel_region_reference(&brw->state.depth_region, depth_region);
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ca51d15..65af227 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
struct brw_fragment_program *fp,
struct brw_wm_prog_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_wm_compile *c;
const GLuint *program;
GLuint program_size;
@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
/* Scratch space is used for register spilling */
if (c->last_scratch) {
+ uint32_t total_scratch;
+
/* Per-thread scratch space is power-of-two sized. */
for (c->prog_data.total_scratch = 1024;
c->prog_data.total_scratch <= c->last_scratch;
c->prog_data.total_scratch *= 2) {
/* empty */
}
+ total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+
+ if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
+ drm_intel_bo_unreference(brw->wm.scratch_bo);
+ brw->wm.scratch_bo = NULL;
+ }
+ if (brw->wm.scratch_bo == NULL) {
+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ total_scratch,
+ 4096);
+ }
}
else {
c->prog_data.total_scratch = 0;
@@ -348,6 +363,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* _NEW_HINT */
key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+ /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
+ key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+
/* _NEW_TEXTURE */
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
@@ -370,14 +388,14 @@ static void brw_wm_populate_key( struct brw_context *brw,
* well and our shadow compares always return the result in
* all 4 channels.
*/
- if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
- if (t->DepthMode == GL_ALPHA) {
+ if (t->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+ if (t->Sampler.DepthMode == GL_ALPHA) {
swizzles[0] = SWIZZLE_ZERO;
swizzles[1] = SWIZZLE_ZERO;
swizzles[2] = SWIZZLE_ZERO;
- } else if (t->DepthMode == GL_LUMINANCE) {
+ } else if (t->Sampler.DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
- } else if (t->DepthMode == GL_RED) {
+ } else if (t->Sampler.DepthMode == GL_RED) {
/* See table 3.23 of the GL 3.0 spec. */
swizzles[1] = SWIZZLE_ZERO;
swizzles[2] = SWIZZLE_ZERO;
@@ -471,6 +489,7 @@ const struct brw_tracked_state brw_wm_prog = {
_NEW_POLYGON |
_NEW_LINE |
_NEW_LIGHT |
+ _NEW_FRAG_CLAMP |
_NEW_BUFFERS |
_NEW_TEXTURE),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index c40d7bf..40659f2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -65,6 +65,7 @@ struct brw_wm_prog_key {
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;
GLuint alpha_test:1;
+ GLuint clamp_fragment_color:1;
GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
GLuint shadowtex_mask:16;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index ecfd21d..cdc1f36 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1408,6 +1408,9 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
+ if (c->key.clamp_fragment_color)
+ brw_set_saturate(p, 1);
+
for (channel = 0; channel < 4; channel++) {
if (intel->gen >= 6) {
/* gen6 SIMD16 single source DP write looks like:
@@ -1459,6 +1462,9 @@ void emit_fb_write(struct brw_wm_compile *c,
}
}
}
+
+ brw_set_saturate(p, 0);
+
/* skip over the regs populated above:
*/
if (c->dispatch_width == 16)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 30672b4..cfc30d8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -288,26 +288,26 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
? ctx->Texture.CubeMapSeamless : GL_FALSE;
- entry->wrap_r = texObj->WrapR;
- entry->wrap_s = texObj->WrapS;
- entry->wrap_t = texObj->WrapT;
-
- entry->maxlod = texObj->MaxLod;
- entry->minlod = texObj->MinLod;
- entry->lod_bias = texUnit->LodBias + texObj->LodBias;
- entry->max_aniso = texObj->MaxAnisotropy;
- entry->minfilter = texObj->MinFilter;
- entry->magfilter = texObj->MagFilter;
- entry->comparemode = texObj->CompareMode;
- entry->comparefunc = texObj->CompareFunc;
+ entry->wrap_r = texObj->Sampler.WrapR;
+ entry->wrap_s = texObj->Sampler.WrapS;
+ entry->wrap_t = texObj->Sampler.WrapT;
+
+ entry->maxlod = texObj->Sampler.MaxLod;
+ entry->minlod = texObj->Sampler.MinLod;
+ entry->lod_bias = texUnit->LodBias + texObj->Sampler.LodBias;
+ entry->max_aniso = texObj->Sampler.MaxAnisotropy;
+ entry->minfilter = texObj->Sampler.MinFilter;
+ entry->magfilter = texObj->Sampler.MagFilter;
+ entry->comparemode = texObj->Sampler.CompareMode;
+ entry->comparefunc = texObj->Sampler.CompareFunc;
drm_intel_bo_unreference(brw->wm.sdc_bo[unit]);
if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
float bordercolor[4] = {
- texObj->BorderColor.f[0],
- texObj->BorderColor.f[0],
- texObj->BorderColor.f[0],
- texObj->BorderColor.f[0]
+ texObj->Sampler.BorderColor.f[0],
+ texObj->Sampler.BorderColor.f[0],
+ texObj->Sampler.BorderColor.f[0],
+ texObj->Sampler.BorderColor.f[0]
};
/* GL specs that border color for depth textures is taken from the
* R channel, while the hardware uses A. Spam R into all the
@@ -316,7 +316,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
} else {
brw->wm.sdc_bo[unit] = upload_default_color(brw,
- texObj->BorderColor.f);
+ texObj->Sampler.BorderColor.f);
}
key->sampler_count = unit + 1;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 5b5afc4..be4b260 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
static void upload_wm_unit( struct brw_context *brw )
{
- struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
drm_intel_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
- /* Allocate the necessary scratch space if we haven't already. Don't
- * bother reducing the allocation later, since we use scratch so
- * rarely.
- */
- if (key.total_scratch) {
- GLuint total = key.total_scratch * brw->wm_max_threads;
-
- if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
- drm_intel_bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
- }
- if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
- "wm scratch",
- total,
- 4096);
- }
- }
-
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 1010d9f..e3396a3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -100,18 +100,37 @@ static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
[MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
[MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
[MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+ [MESA_FORMAT_SIGNED_R8] = BRW_SURFACEFORMAT_R8_SNORM,
+ [MESA_FORMAT_SIGNED_RG88_REV] = BRW_SURFACEFORMAT_R8G8_SNORM,
[MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+ [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM,
+ [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM,
+ [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
+ [MESA_FORMAT_RG_FLOAT32] = BRW_SURFACEFORMAT_R32G32_FLOAT,
+ [MESA_FORMAT_R_FLOAT32] = BRW_SURFACEFORMAT_R32_FLOAT,
+ [MESA_FORMAT_INTENSITY_FLOAT32] = BRW_SURFACEFORMAT_I32_FLOAT,
+ [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
+ [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
+ [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT,
};
bool
brw_render_target_supported(gl_format format)
{
+ /* These are not color render targets like the table holds, but we
+ * ask the question for FBO completeness.
+ */
if (format == MESA_FORMAT_S8_Z24 ||
format == MESA_FORMAT_X8_Z24 ||
format == MESA_FORMAT_Z16) {
return true;
}
+ /* The value of this BRW_SURFACEFORMAT is 0, so hardcode it.
+ */
+ if (format == MESA_FORMAT_RGBA_FLOAT32)
+ return true;
+
/* Not exactly true, as some of those formats are not renderable.
* But at least we know how to translate them.
*/
@@ -155,6 +174,13 @@ static GLuint translate_tex_format( gl_format mesa_format,
return brw_format_for_mesa_format[mesa_format];
else if (srgb_decode == GL_SKIP_DECODE_EXT)
return brw_format_for_mesa_format[_mesa_get_srgb_format_linear(mesa_format)];
+
+ case MESA_FORMAT_RGBA_FLOAT32:
+ /* The value of this BRW_SURFACEFORMAT is 0, which tricks the
+ * assertion below.
+ */
+ return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
default:
assert(brw_format_for_mesa_format[mesa_format] != 0);
return brw_format_for_mesa_format[mesa_format];
@@ -197,8 +223,9 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
surf->ss0.surface_type = translate_tex_target(tObj->Target);
surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat,
- firstImage->InternalFormat,
- tObj->DepthMode, tObj->sRGBDecode);
+ firstImage->InternalFormat,
+ tObj->Sampler.DepthMode,
+ tObj->Sampler.sRGBDecode);
/* This is ok for all textures with channel width 8bit or less:
*/
@@ -425,6 +452,14 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
*/
surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
+ case MESA_FORMAT_INTENSITY_FLOAT32:
+ case MESA_FORMAT_LUMINANCE_FLOAT32:
+ /* For these formats, we just need to read/write the first
+ * channel into R, which is to say that we just treat them as
+ * GL_RED.
+ */
+ surf->ss0.surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+ break;
case MESA_FORMAT_SARGB8:
/* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
surfaces to the blend/update as sRGB */
@@ -434,8 +469,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
break;
default:
+ assert(brw_render_target_supported(irb->Base.Format));
surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
- assert(surf->ss0.surface_format != 0);
}
surf->ss0.surface_type = BRW_SURFACE_2D;
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index d1648a1..1b935fb 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -210,10 +210,10 @@ color_calc_state_populate_key(struct brw_context *brw,
if (ctx->Color.AlphaEnabled)
key->alpha_ref = ctx->Color.AlphaRef;
- key->blend_constant_color[0] = ctx->Color.BlendColor[0];
- key->blend_constant_color[1] = ctx->Color.BlendColor[1];
- key->blend_constant_color[2] = ctx->Color.BlendColor[2];
- key->blend_constant_color[3] = ctx->Color.BlendColor[3];
+ key->blend_constant_color[0] = ctx->Color.BlendColorUnclamped[0];
+ key->blend_constant_color[1] = ctx->Color.BlendColorUnclamped[1];
+ key->blend_constant_color[2] = ctx->Color.BlendColorUnclamped[2];
+ key->blend_constant_color[3] = ctx->Color.BlendColorUnclamped[3];
}
/**
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
index 7296c7c..c1d0a73 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -44,38 +44,22 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
- if (brw->gs.prog_bo) {
- BEGIN_BATCH(7);
- OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
- OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_BATCH(GEN6_GS_SPF_MODE |
- (0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
- (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
- OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
- (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
- (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
- OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE);
- OUT_BATCH(GEN6_GS_ENABLE);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(7);
- OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
- OUT_BATCH(0); /* prog_bo */
- OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
- (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
- OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
- (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
- (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
- OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
+ // GS should never be used on Gen6. Disable it.
+ assert(brw->gs.prog_bo == NULL);
+ BEGIN_BATCH(7);
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+ OUT_BATCH(0); /* prog_bo */
+ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
}
const struct brw_tracked_state gen6_gs_state = {
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index c3819f9..909e1bb 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -34,26 +34,25 @@
static void
prepare_urb( struct brw_context *brw )
{
- int urb_size, max_urb_entry;
- struct intel_context *intel = &brw->intel;
-
- if (IS_GT1(intel->intelScreen->deviceID)) {
- urb_size = 32 * 1024;
- max_urb_entry = 128;
- } else {
- urb_size = 64 * 1024;
- max_urb_entry = 256;
- }
-
- brw->urb.nr_vs_entries = max_urb_entry;
- brw->urb.nr_gs_entries = max_urb_entry;
+ int nr_vs_entries;
/* CACHE_NEW_VS_PROG */
brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
- if (2 * brw->urb.vs_size > urb_size)
- brw->urb.nr_vs_entries = brw->urb.nr_gs_entries =
- (urb_size ) / (2 * brw->urb.vs_size);
+ /* Calculate how many VS URB entries fit in the total URB size */
+ nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128);
+
+ if (nr_vs_entries > brw->urb.max_vs_handles)
+ nr_vs_entries = brw->urb.max_vs_handles;
+
+ /* According to volume 2a, nr_vs_entries must be a multiple of 4. */
+ brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
+
+ /* Since we currently don't support Geometry Shaders, we always put the
+ * GS unit in passthrough mode and don't allocate it any URB space.
+ */
+ brw->urb.nr_gs_entries = 0;
+ brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */
}
static void
@@ -61,6 +60,7 @@ upload_urb(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
+ assert(brw->urb.nr_vs_entries >= 24);
assert(brw->urb.nr_vs_entries % 4 == 0);
assert(brw->urb.nr_gs_entries % 4 == 0);
/* GS requirement */
@@ -70,7 +70,7 @@ upload_urb(struct brw_context *brw)
OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
- OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+ OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index ce0b8ea..a10cec3 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -137,7 +137,7 @@ upload_vs_state(struct brw_context *brw)
(brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
- OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */
+ OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE);
ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 78901ec..8215cb1 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw)
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
- dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+ dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->dispatch_width == 8)
@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(dw2);
- OUT_BATCH(0); /* scratch space base offset */
+ if (brw->wm.prog_data->total_scratch) {
+ OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ ffs(brw->wm.prog_data->total_scratch) - 11);
+ } else {
+ OUT_BATCH(0);
+ }
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6);