summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp52
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp38
6 files changed, 63 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index da6ed5b..a97a944 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -948,6 +948,7 @@ enum opcode {
VS_OPCODE_URB_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
/**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 335a800..0d6ac0c 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -568,6 +568,10 @@ brw_instruction_name(enum opcode op)
return "pull_constant_load";
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
return "pull_constant_load_gen7";
+
+ case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
+ return "set_simd4x2_header_gen9";
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 0363924..a0ee2cc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -500,6 +500,8 @@ private:
struct brw_reg dst,
struct brw_reg surf_index,
struct brw_reg offset);
+ void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
+ struct brw_reg dst);
void generate_unpack_flags(struct brw_reg dst);
void generate_untyped_atomic(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 980e266..70d2af5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -44,6 +44,7 @@ can_do_writemask(const struct brw_context *brw,
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
return false;
default:
/* The MATH instruction on Gen6 only executes in align1 mode, which does
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index e4addf7..b22a555 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1039,38 +1039,18 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
{
assert(surf_index.type == BRW_REGISTER_TYPE_UD);
- struct brw_reg src = offset;
- bool header_present = false;
- int mlen = 1;
-
- if (brw->gen >= 9) {
- /* Skylake requires a message header in order to use SIMD4x2 mode. */
- src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
- mlen = 2;
- header_present = true;
-
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- brw_set_default_access_mode(p, BRW_ALIGN_1);
-
- brw_MOV(p, get_element_ud(src, 2),
- brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
- brw_pop_insn_state(p);
- }
-
if (surf_index.file == BRW_IMMEDIATE_VALUE) {
brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, dst);
- brw_set_src0(p, insn, src);
+ brw_set_src0(p, insn, offset);
brw_set_sampler_message(p, insn,
surf_index.dw1.ud,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- mlen,
- header_present,
+ inst->mlen,
+ inst->header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1095,14 +1075,14 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
/* dst = send(offset, a0.0 | <descriptor>) */
brw_inst *insn = brw_send_indirect_message(
- p, BRW_SFID_SAMPLER, dst, src, addr);
+ p, BRW_SFID_SAMPLER, dst, offset, addr);
brw_set_sampler_message(p, insn,
0 /* surface */,
0 /* sampler */,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1 /* rlen */,
- mlen /* mlen */,
- header_present /* header */,
+ inst->mlen,
+ inst->header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1113,6 +1093,22 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
}
void
+vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
+ struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ brw_MOV(p, vec8(dst), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, get_element_ud(dst, 2),
+ brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
+
+ brw_pop_insn_state(p);
+}
+
+void
vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg atomic_op,
@@ -1435,6 +1431,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
break;
+ case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
+ generate_set_simd4x2_header_gen9(inst, dst);
+ break;
+
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f7d542b..3d16caa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1313,16 +1313,36 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
vec4_instruction *pull;
- if (brw->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ if (brw->gen >= 9) {
+ /* Gen9+ needs a message header in order to use SIMD4x2 mode */
+ src_reg header(this, glsl_type::uvec4_type, 2);
- /* We have to use a message header on Skylake to get SIMD4x2 mode.
- * Reserve space for the register.
- */
- if (brw->gen >= 9) {
- grf_offset.reg_offset++;
- alloc.sizes[grf_offset.reg] = 2;
- }
+ pull = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+ dst_reg(header));
+
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
+
+ dst_reg index_reg = retype(offset(dst_reg(header), 1),
+ offset_reg.type);
+ pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
+
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
+
+ pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ dst,
+ surf_index,
+ header);
+ pull->mlen = 2;
+ pull->header_present = true;
+ } else if (brw->gen >= 7) {
+ dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
grf_offset.type = offset_reg.type;