summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp784
1 files changed, 506 insertions, 278 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e218a85..3bec728 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -43,6 +43,7 @@
#include "brw_wm.h"
#include "brw_fs.h"
#include "brw_cs.h"
+#include "brw_vec4_gs_visitor.h"
#include "brw_cfg.h"
#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
@@ -75,8 +76,9 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
/* This will be the case for almost all instructions. */
switch (dst.file) {
- case GRF:
- case HW_REG:
+ case VGRF:
+ case ARF:
+ case FIXED_GRF:
case MRF:
case ATTR:
this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
@@ -203,7 +205,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type);
+ fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), dst.type);
fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -232,7 +234,7 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
const fs_builder ubld = bld.annotate("send dependency resolve")
.half(0);
- ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
+ ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
}
bool
@@ -283,14 +285,15 @@ fs_inst::is_send_from_grf() const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
- return src[1].file == GRF;
+ return src[1].file == VGRF;
case FS_OPCODE_FB_WRITE:
- return src[0].file == GRF;
+ return src[0].file == VGRF;
default:
if (is_tex())
- return src[0].file == GRF;
+ return src[0].file == VGRF;
return false;
}
@@ -303,10 +306,10 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
return false;
fs_reg reg = this->src[0];
- if (reg.file != GRF || reg.reg_offset != 0 || reg.stride == 0)
+ if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0)
return false;
- if (grf_alloc.sizes[reg.reg] != this->regs_written)
+ if (grf_alloc.sizes[reg.nr] != this->regs_written)
return false;
for (int i = 0; i < this->sources; i++) {
@@ -378,7 +381,7 @@ fs_reg::fs_reg(float f)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_F;
this->stride = 0;
- this->fixed_hw_reg.dw1.f = f;
+ this->f = f;
}
/** Immediate value constructor. */
@@ -388,7 +391,7 @@ fs_reg::fs_reg(int32_t i)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_D;
this->stride = 0;
- this->fixed_hw_reg.dw1.d = i;
+ this->d = i;
}
/** Immediate value constructor. */
@@ -398,7 +401,7 @@ fs_reg::fs_reg(uint32_t u)
this->file = IMM;
this->type = BRW_REGISTER_TYPE_UD;
this->stride = 0;
- this->fixed_hw_reg.dw1.ud = u;
+ this->ud = u;
}
/** Vector float immediate value constructor. */
@@ -407,7 +410,7 @@ fs_reg::fs_reg(uint8_t vf[4])
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned));
+ memcpy(&this->ud, vf, sizeof(unsigned));
}
/** Vector float immediate value constructor. */
@@ -416,42 +419,38 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_VF;
- this->fixed_hw_reg.dw1.ud = (vf0 << 0) |
- (vf1 << 8) |
- (vf2 << 16) |
- (vf3 << 24);
+ this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
}
-/** Fixed brw_reg. */
-fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
+fs_reg::fs_reg(struct brw_reg reg) :
+ backend_reg(reg)
{
- init();
- this->file = HW_REG;
- this->fixed_hw_reg = fixed_hw_reg;
- this->type = fixed_hw_reg.type;
+ this->reg_offset = 0;
+ this->subreg_offset = 0;
+ this->reladdr = NULL;
+ this->stride = 1;
+ if (this->file == IMM &&
+ (this->type != BRW_REGISTER_TYPE_V &&
+ this->type != BRW_REGISTER_TYPE_UV &&
+ this->type != BRW_REGISTER_TYPE_VF)) {
+ this->stride = 0;
+ }
}
bool
fs_reg::equals(const fs_reg &r) const
{
- return (file == r.file &&
- reg == r.reg &&
+ return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
reg_offset == r.reg_offset &&
subreg_offset == r.subreg_offset &&
- type == r.type &&
- negate == r.negate &&
- abs == r.abs &&
!reladdr && !r.reladdr &&
- ((file != HW_REG && file != IMM) ||
- memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
- sizeof(fixed_hw_reg)) == 0) &&
stride == r.stride);
}
fs_reg &
fs_reg::set_smear(unsigned subreg)
{
- assert(file != HW_REG && file != IMM);
+ assert(file != ARF && file != FIXED_GRF && file != IMM);
subreg_offset = subreg * type_sz(type);
stride = 0;
return *this;
@@ -466,9 +465,9 @@ fs_reg::is_contiguous() const
unsigned
fs_reg::component_size(unsigned width) const
{
- const unsigned stride = (file != HW_REG ? this->stride :
- fixed_hw_reg.hstride == 0 ? 0 :
- 1 << (fixed_hw_reg.hstride - 1));
+ const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
+ hstride == 0 ? 0 :
+ 1 << (hstride - 1));
return MAX2(width * stride, 1) * type_sz(type);
}
@@ -514,6 +513,19 @@ type_size_scalar(const struct glsl_type *type)
}
/**
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+{
+ return 4 * type_size_vec4(type);
+}
+
+/**
* Create a MOV to read the timestamp register.
*
* The caller is responsible for emitting the MOV. The return value is
@@ -529,7 +541,7 @@ fs_visitor::get_timestamp(const fs_builder &bld)
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
@@ -584,7 +596,7 @@ fs_visitor::emit_shader_time_end()
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
diff.set_smear(0);
const fs_builder cbld = ibld.group(1, 0);
@@ -706,7 +718,7 @@ fs_inst::components_read(unsigned i) const
assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
- return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+ return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
else
return 1;
@@ -717,6 +729,7 @@ fs_inst::components_read(unsigned i) const
case SHADER_OPCODE_TXS_LOGICAL:
case FS_OPCODE_TXB_LOGICAL:
case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
case SHADER_OPCODE_TXF_UMS_LOGICAL:
case SHADER_OPCODE_TXF_MCS_LOGICAL:
case SHADER_OPCODE_LOD_LOGICAL:
@@ -725,13 +738,16 @@ fs_inst::components_read(unsigned i) const
assert(src[8].file == IMM && src[9].file == IMM);
/* Texture coordinates. */
if (i == 0)
- return src[8].fixed_hw_reg.dw1.ud;
+ return src[8].ud;
/* Texture derivatives. */
else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
- return src[9].fixed_hw_reg.dw1.ud;
+ return src[9].ud;
/* Texture offset. */
else if (i == 7)
return 2;
+ /* MCS */
+ else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
+ return 2;
else
return 1;
@@ -740,7 +756,7 @@ fs_inst::components_read(unsigned i) const
assert(src[3].file == IMM);
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source (ignored for reads). */
else if (i == 1)
return 0;
@@ -753,10 +769,10 @@ fs_inst::components_read(unsigned i) const
src[4].file == IMM);
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source. */
else if (i == 1)
- return src[4].fixed_hw_reg.dw1.ud;
+ return src[4].ud;
else
return 1;
@@ -764,10 +780,10 @@ fs_inst::components_read(unsigned i) const
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
assert(src[3].file == IMM &&
src[4].file == IMM);
- const unsigned op = src[4].fixed_hw_reg.dw1.ud;
+ const unsigned op = src[4].ud;
/* Surface coordinates. */
if (i == 0)
- return src[3].fixed_hw_reg.dw1.ud;
+ return src[3].ud;
/* Surface operation source. */
else if (i == 1 && op == BRW_AOP_CMPWR)
return 2;
@@ -793,6 +809,7 @@ fs_inst::regs_read(int arg) const
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -825,7 +842,7 @@ fs_inst::regs_read(int arg) const
return 1;
default:
- if (is_tex() && arg == 0 && src[0].file == GRF)
+ if (is_tex() && arg == 0 && src[0].file == VGRF)
return mlen;
break;
}
@@ -836,9 +853,10 @@ fs_inst::regs_read(int arg) const
case UNIFORM:
case IMM:
return 1;
- case GRF:
+ case ARF:
+ case FIXED_GRF:
+ case VGRF:
case ATTR:
- case HW_REG:
return DIV_ROUND_UP(components_read(arg) *
src[arg].component_size(exec_size),
REG_SIZE);
@@ -896,6 +914,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
@@ -938,26 +957,24 @@ fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, alloc.allocate(type_size_scalar(type) * reg_width),
+ return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width),
brw_type_for_base_type(type));
}
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg)
+fs_reg::fs_reg(enum brw_reg_file file, int nr)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = BRW_REGISTER_TYPE_F;
this->stride = (file == UNIFORM ? 0 : 1);
}
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
+fs_reg::fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type)
{
init();
this->file = file;
- this->reg = reg;
+ this->nr = nr;
this->type = type;
this->stride = (file == UNIFORM ? 0 : 1);
}
@@ -1285,9 +1302,9 @@ fs_visitor::emit_sampleid_setup()
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
- fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+ fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
t1.set_smear(0);
- fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
+ fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1308,9 +1325,15 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
+
+ /* SKL+ has an extra bit for the Starting Sample Pair Index to
+ * accomodate 16x MSAA.
+ */
+ unsigned sspi_mask = devinfo->gen >= 9 ? 0x1c0 : 0xc0;
+
abld.exec_all().group(1, 0)
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(0xc0));
+ fs_reg(sspi_mask));
abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
/* This works for both SIMD8 and SIMD16 */
@@ -1362,6 +1385,57 @@ fs_visitor::emit_discard_jump()
}
void
+fs_visitor::emit_gs_thread_end()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+
+ if (gs_compile->control_data_header_size_bits > 0) {
+ emit_gs_control_data_bits(this->final_gs_vertex_count);
+ }
+
+ const fs_builder abld = bld.annotate("thread end");
+ fs_inst *inst;
+
+ if (gs_prog_data->static_vertex_count != -1) {
+ foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
+ if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
+ prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) {
+ prev->eot = true;
+
+ /* Delete now dead instructions. */
+ foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
+ if (dead == prev)
+ break;
+ dead->remove();
+ }
+ return;
+ } else if (prev->is_control_flow() || prev->has_side_effects()) {
+ break;
+ }
+ }
+ fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr);
+ inst->mlen = 1;
+ } else {
+ fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
+ sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ sources[1] = this->final_gs_vertex_count;
+ abld.LOAD_PAYLOAD(payload, sources, 2, 2);
+ inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+ inst->mlen = 2;
+ }
+ inst->eot = true;
+ inst->offset = 0;
+}
+
+void
fs_visitor::assign_curb_setup()
{
if (dispatch_width == 8) {
@@ -1384,7 +1458,7 @@ fs_visitor::assign_curb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == UNIFORM) {
- int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset;
int constant_nr;
if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
constant_nr = push_constant_loc[uniform_nr];
@@ -1400,10 +1474,11 @@ fs_visitor::assign_curb_setup()
struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +
constant_nr / 8,
constant_nr % 8);
+ brw_reg.abs = inst->src[i].abs;
+ brw_reg.negate = inst->src[i].negate;
assert(inst->src[i].stride == 0);
- inst->src[i].file = HW_REG;
- inst->src[i].fixed_hw_reg = byte_offset(
+ inst->src[i] = byte_offset(
retype(brw_reg, inst->src[i].type),
inst->src[i].subreg_offset);
}
@@ -1518,13 +1593,13 @@ fs_visitor::assign_urb_setup()
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode == FS_OPCODE_LINTERP) {
- assert(inst->src[1].file == HW_REG);
- inst->src[1].fixed_hw_reg.nr += urb_start;
+ assert(inst->src[1].file == FIXED_GRF);
+ inst->src[1].nr += urb_start;
}
if (inst->opcode == FS_OPCODE_CINTERP) {
- assert(inst->src[0].file == HW_REG);
- inst->src[0].fixed_hw_reg.nr += urb_start;
+ assert(inst->src[0].file == FIXED_GRF);
+ inst->src[0].nr += urb_start;
}
}
@@ -1533,6 +1608,30 @@ fs_visitor::assign_urb_setup()
}
void
+fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
+{
+ for (int i = 0; i < inst->sources; i++) {
+ if (inst->src[i].file == ATTR) {
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].nr +
+ inst->src[i].reg_offset;
+
+ unsigned width = inst->src[i].stride == 0 ? 1 : inst->exec_size;
+ struct brw_reg reg =
+ stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+ inst->src[i].subreg_offset),
+ inst->exec_size * inst->src[i].stride,
+ width, inst->src[i].stride);
+ reg.abs = inst->src[i].abs;
+ reg.negate = inst->src[i].negate;
+
+ inst->src[i] = reg;
+ }
+ }
+}
+
+void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
@@ -1549,24 +1648,44 @@ fs_visitor::assign_vs_urb_setup()
/* Rewrite all ATTR file references to the hw grf that they land in. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == ATTR) {
- int grf = payload.num_regs +
- prog_data->curb_read_length +
- inst->src[i].reg +
- inst->src[i].reg_offset;
-
- inst->src[i].file = HW_REG;
- inst->src[i].fixed_hw_reg =
- stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
- inst->src[i].subreg_offset),
- inst->exec_size * inst->src[i].stride,
- inst->exec_size, inst->src[i].stride);
- }
+ convert_attr_sources_to_hw_regs(inst);
+ }
+}
+
+void
+fs_visitor::assign_gs_urb_setup()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
+
+ first_non_payload_grf +=
+ 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
+
+ const unsigned first_icp_handle = payload.num_regs -
+ (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
+
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ /* Lower URB_READ_SIMD8 opcodes into real messages. */
+ if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
+ assert(inst->src[0].file == IMM);
+ inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
+ inst->src[0].ud,
+ 0), BRW_REGISTER_TYPE_UD);
+ /* for now, assume constant - we can do per-slot offsets later */
+ assert(inst->src[1].file == IMM);
+ inst->offset = inst->src[1].ud;
+ inst->src[1] = fs_reg();
+ inst->mlen = 1;
+ inst->base_mrf = -1;
}
+
+ /* Rewrite all ATTR file references to GRFs. */
+ convert_attr_sources_to_hw_regs(inst);
}
}
+
/**
* Split large virtual GRFs into separate components if we can.
*
@@ -1609,30 +1728,30 @@ fs_visitor::split_virtual_grfs()
/* Mark all used registers as fully splittable */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- int reg = vgrf_to_reg[inst->dst.reg];
- for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
+ if (inst->dst.file == VGRF) {
+ int reg = vgrf_to_reg[inst->dst.nr];
+ for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++)
split_points[reg + j] = true;
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- int reg = vgrf_to_reg[inst->src[i].reg];
- for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
+ if (inst->src[i].file == VGRF) {
+ int reg = vgrf_to_reg[inst->src[i].nr];
+ for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++)
split_points[reg + j] = true;
}
}
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+ if (inst->dst.file == VGRF) {
+ int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
for (int j = 1; j < inst->regs_written; j++)
split_points[reg + j] = false;
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+ if (inst->src[i].file == VGRF) {
+ int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
for (int j = 1; j < inst->regs_read(i); j++)
split_points[reg + j] = false;
}
@@ -1678,16 +1797,16 @@ fs_visitor::split_virtual_grfs()
assert(reg == reg_count);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF) {
- reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
- inst->dst.reg = new_virtual_grf[reg];
+ if (inst->dst.file == VGRF) {
+ reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
+ inst->dst.nr = new_virtual_grf[reg];
inst->dst.reg_offset = new_reg_offset[reg];
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF) {
- reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
- inst->src[i].reg = new_virtual_grf[reg];
+ if (inst->src[i].file == VGRF) {
+ reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
+ inst->src[i].nr = new_virtual_grf[reg];
inst->src[i].reg_offset = new_reg_offset[reg];
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
@@ -1714,12 +1833,12 @@ fs_visitor::compact_virtual_grfs()
/* Mark which virtual GRFs are used. */
foreach_block_and_inst(block, const fs_inst, inst, cfg) {
- if (inst->dst.file == GRF)
- remap_table[inst->dst.reg] = 0;
+ if (inst->dst.file == VGRF)
+ remap_table[inst->dst.nr] = 0;
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF)
- remap_table[inst->src[i].reg] = 0;
+ if (inst->src[i].file == VGRF)
+ remap_table[inst->src[i].nr] = 0;
}
}
@@ -1743,12 +1862,12 @@ fs_visitor::compact_virtual_grfs()
/* Patch all the instructions to use the newly renumbered registers */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->dst.file == GRF)
- inst->dst.reg = remap_table[inst->dst.reg];
+ if (inst->dst.file == VGRF)
+ inst->dst.nr = remap_table[inst->dst.nr];
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF)
- inst->src[i].reg = remap_table[inst->src[i].reg];
+ if (inst->src[i].file == VGRF)
+ inst->src[i].nr = remap_table[inst->src[i].nr];
}
}
@@ -1757,9 +1876,9 @@ fs_visitor::compact_virtual_grfs()
* think some random VGRF is delta_xy.
*/
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
- if (delta_xy[i].file == GRF) {
- if (remap_table[delta_xy[i].reg] != -1) {
- delta_xy[i].reg = remap_table[delta_xy[i].reg];
+ if (delta_xy[i].file == VGRF) {
+ if (remap_table[delta_xy[i].nr] != -1) {
+ delta_xy[i].nr = remap_table[delta_xy[i].nr];
} else {
delta_xy[i].file = BAD_FILE;
}
@@ -1811,7 +1930,7 @@ fs_visitor::assign_constant_locations()
continue;
if (inst->src[i].reladdr) {
- int uniform = inst->src[i].reg;
+ int uniform = inst->src[i].nr;
/* If this array isn't already present in the pull constant buffer,
* add it.
@@ -1823,7 +1942,7 @@ fs_visitor::assign_constant_locations()
}
} else {
/* Mark the the one accessed uniform as live */
- int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
}
@@ -1899,7 +2018,7 @@ fs_visitor::demote_pull_constants()
continue;
int pull_index;
- unsigned location = inst->src[i].reg + inst->src[i].reg_offset;
+ unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
if (location >= uniforms) /* Out of bounds access */
pull_index = -1;
else
@@ -1910,7 +2029,7 @@ fs_visitor::demote_pull_constants()
/* Set up the annotation tracking for new generated instructions. */
const fs_builder ibld(this, block, inst);
- fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
+ const unsigned index = stage_prog_data->binding_table.pull_constants_start;
fs_reg dst = vgrf(glsl_type::float_type);
assert(inst->src[i].stride == 0);
@@ -1918,7 +2037,7 @@ fs_visitor::demote_pull_constants()
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- surf_index,
+ fs_reg(index),
*inst->src[i].reladdr,
pull_index);
inst->src[i].reladdr = NULL;
@@ -1927,13 +2046,14 @@ fs_visitor::demote_pull_constants()
const fs_builder ubld = ibld.exec_all().group(8, 0);
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, surf_index, offset);
+ dst, fs_reg(index), offset);
inst->src[i].set_smear(pull_index & 3);
}
+ brw_mark_surface_used(prog_data, index);
/* Rewrite the instruction to use the temporary VGRF. */
- inst->src[i].file = GRF;
- inst->src[i].reg = dst.reg;
+ inst->src[i].file = VGRF;
+ inst->src[i].nr = dst.nr;
inst->src[i].reg_offset = 0;
}
}
@@ -1955,8 +2075,7 @@ fs_visitor::opt_algebraic()
if (inst->dst.type != inst->src[0].type)
assert(!"unimplemented: saturate mixed types");
- if (brw_saturate_immediate(inst->dst.type,
- &inst->src[0].fixed_hw_reg)) {
+ if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
inst->saturate = false;
progress = true;
}
@@ -1996,7 +2115,7 @@ fs_visitor::opt_algebraic()
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0].fixed_hw_reg.dw1.f *= inst->src[1].fixed_hw_reg.dw1.f;
+ inst->src[0].f *= inst->src[1].f;
inst->src[1] = reg_undef;
progress = true;
break;
@@ -2017,7 +2136,7 @@ fs_visitor::opt_algebraic()
if (inst->src[0].file == IMM) {
assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0].fixed_hw_reg.dw1.f += inst->src[1].fixed_hw_reg.dw1.f;
+ inst->src[0].f += inst->src[1].f;
inst->src[1] = reg_undef;
progress = true;
break;
@@ -2066,7 +2185,7 @@ fs_visitor::opt_algebraic()
case BRW_CONDITIONAL_L:
switch (inst->src[1].type) {
case BRW_REGISTER_TYPE_F:
- if (inst->src[1].fixed_hw_reg.dw1.f >= 1.0f) {
+ if (inst->src[1].f >= 1.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[1] = reg_undef;
inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2081,7 +2200,7 @@ fs_visitor::opt_algebraic()
case BRW_CONDITIONAL_G:
switch (inst->src[1].type) {
case BRW_REGISTER_TYPE_F:
- if (inst->src[1].fixed_hw_reg.dw1.f <= 0.0f) {
+ if (inst->src[1].f <= 0.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[1] = reg_undef;
inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2118,7 +2237,7 @@ fs_visitor::opt_algebraic()
progress = true;
} else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
inst->opcode = BRW_OPCODE_ADD;
- inst->src[1].fixed_hw_reg.dw1.f *= inst->src[2].fixed_hw_reg.dw1.f;
+ inst->src[1].f *= inst->src[2].f;
inst->src[2] = reg_undef;
progress = true;
}
@@ -2143,7 +2262,7 @@ fs_visitor::opt_algebraic()
} else if (inst->src[1].file == IMM) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = component(inst->src[0],
- inst->src[1].fixed_hw_reg.dw1.ud);
+ inst->src[1].ud);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
@@ -2344,31 +2463,31 @@ fs_visitor::opt_register_renaming()
/* Rewrite instruction sources. */
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == GRF &&
- remap[inst->src[i].reg] != -1 &&
- remap[inst->src[i].reg] != inst->src[i].reg) {
- inst->src[i].reg = remap[inst->src[i].reg];
+ if (inst->src[i].file == VGRF &&
+ remap[inst->src[i].nr] != -1 &&
+ remap[inst->src[i].nr] != inst->src[i].nr) {
+ inst->src[i].nr = remap[inst->src[i].nr];
progress = true;
}
}
- const int dst = inst->dst.reg;
+ const int dst = inst->dst.nr;
if (depth == 0 &&
- inst->dst.file == GRF &&
- alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
+ inst->dst.file == VGRF &&
+ alloc.sizes[inst->dst.nr] == inst->exec_size / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
remap[dst] = alloc.allocate(inst->exec_size / 8);
- inst->dst.reg = remap[dst];
+ inst->dst.nr = remap[dst];
progress = true;
}
- } else if (inst->dst.file == GRF &&
+ } else if (inst->dst.file == VGRF &&
remap[dst] != -1 &&
remap[dst] != dst) {
- inst->dst.reg = remap[dst];
+ inst->dst.nr = remap[dst];
progress = true;
}
}
@@ -2377,8 +2496,8 @@ fs_visitor::opt_register_renaming()
invalidate_live_intervals();
for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
- if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
- delta_xy[i].reg = remap[delta_xy[i].reg];
+ if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) {
+ delta_xy[i].nr = remap[delta_xy[i].nr];
}
}
}
@@ -2445,7 +2564,7 @@ fs_visitor::compute_to_mrf()
if (inst->opcode != BRW_OPCODE_MOV ||
inst->is_partial_write() ||
- inst->dst.file != MRF || inst->src[0].file != GRF ||
+ inst->dst.file != MRF || inst->src[0].file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].abs || inst->src[0].negate ||
!inst->src[0].is_contiguous() ||
@@ -2455,9 +2574,9 @@ fs_visitor::compute_to_mrf()
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.nr & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.reg & BRW_MRF_COMPR4) {
+ if (inst->dst.nr & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (inst->exec_size == 16) {
mrf_high = mrf_low + 1;
@@ -2468,15 +2587,15 @@ fs_visitor::compute_to_mrf()
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
- if (this->virtual_grf_end[inst->src[0].reg] > ip)
+ if (this->virtual_grf_end[inst->src[0].nr] > ip)
continue;
/* Found a move of a GRF to a MRF. Let's see if we can go
* rewrite the thing that made this GRF to write into the MRF.
*/
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == inst->src[0].reg) {
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr == inst->src[0].nr) {
/* Found the last thing to write our reg we want to turn
* into a compute-to-MRF.
*/
@@ -2511,7 +2630,7 @@ fs_visitor::compute_to_mrf()
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.reg = inst->dst.reg;
+ scan_inst->dst.nr = inst->dst.nr;
scan_inst->saturate |= inst->saturate;
inst->remove(block);
progress = true;
@@ -2531,8 +2650,8 @@ fs_visitor::compute_to_mrf()
*/
bool interfered = false;
for (int i = 0; i < scan_inst->sources; i++) {
- if (scan_inst->src[i].file == GRF &&
- scan_inst->src[i].reg == inst->src[0].reg &&
+ if (scan_inst->src[i].file == VGRF &&
+ scan_inst->src[i].nr == inst->src[0].nr &&
scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
interfered = true;
}
@@ -2544,10 +2663,10 @@ fs_visitor::compute_to_mrf()
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.nr & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.nr & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (scan_inst->exec_size == 16) {
scan_mrf_high = scan_mrf_low + 1;
@@ -2690,8 +2809,8 @@ fs_visitor::emit_repclear_shader()
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
if (uniforms == 1) {
- assert(mov->src[0].file == HW_REG);
- mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ assert(mov->src[0].file == FIXED_GRF);
+ mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
}
}
@@ -2718,7 +2837,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove(block);
progress = true;
@@ -2728,7 +2847,7 @@ fs_visitor::remove_duplicate_mrf_writes()
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.reg] = NULL;
+ last_mrf_move[inst->dst.nr] = NULL;
}
if (inst->mlen > 0 && inst->base_mrf != -1) {
@@ -2741,10 +2860,10 @@ fs_visitor::remove_duplicate_mrf_writes()
}
/* Clear out any MRF move records whose sources got overwritten. */
- if (inst->dst.file == GRF) {
+ if (inst->dst.file == VGRF) {
for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
if (last_mrf_move[i] &&
- last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+ last_mrf_move[i]->src[0].nr == inst->dst.nr) {
last_mrf_move[i] = NULL;
}
}
@@ -2752,9 +2871,9 @@ fs_visitor::remove_duplicate_mrf_writes()
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF &&
- inst->src[0].file == GRF &&
+ inst->src[0].file == VGRF &&
!inst->is_partial_write()) {
- last_mrf_move[inst->dst.reg] = inst;
+ last_mrf_move[inst->dst.nr] = inst;
}
}
@@ -2770,11 +2889,8 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
/* Clear the flag for registers that actually got read (as expected). */
for (int i = 0; i < inst->sources; i++) {
int grf;
- if (inst->src[i].file == GRF) {
- grf = inst->src[i].reg;
- } else if (inst->src[i].file == HW_REG &&
- inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- grf = inst->src[i].fixed_hw_reg.nr;
+ if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) {
+ grf = inst->src[i].nr;
} else {
continue;
}
@@ -2809,7 +2925,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
fs_inst *inst)
{
int write_len = inst->regs_written;
- int first_write_grf = inst->dst.reg;
+ int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2840,9 +2956,9 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
* instruction but a MOV that might have left us an outstanding
* dependency has more latency than a MOV.
*/
- if (scan_inst->dst.file == GRF) {
+ if (scan_inst->dst.file == VGRF) {
for (int i = 0; i < scan_inst->regs_written; i++) {
- int reg = scan_inst->dst.reg + i;
+ int reg = scan_inst->dst.nr + i;
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
@@ -2880,7 +2996,7 @@ void
fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
{
int write_len = inst->regs_written;
- int first_write_grf = inst->dst.reg;
+ int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2906,13 +3022,13 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
/* We insert our reads as late as possible since they're reading the
* result of a SEND, which has massive latency.
*/
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg >= first_write_grf &&
- scan_inst->dst.reg < first_write_grf + write_len &&
- needs_dep[scan_inst->dst.reg - first_write_grf]) {
+ if (scan_inst->dst.file == VGRF &&
+ scan_inst->dst.nr >= first_write_grf &&
+ scan_inst->dst.nr < first_write_grf + write_len &&
+ needs_dep[scan_inst->dst.nr - first_write_grf]) {
DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
- scan_inst->dst.reg);
- needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+ scan_inst->dst.nr);
+ needs_dep[scan_inst->dst.nr - first_write_grf] = false;
}
/* Continue the loop only if we haven't resolved all the dependencies */
@@ -2939,7 +3055,7 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->mlen != 0 && inst->dst.file == GRF) {
+ if (inst->mlen != 0 && inst->dst.file == VGRF) {
insert_gen4_pre_send_dependency_workarounds(block, inst);
insert_gen4_post_send_dependency_workarounds(block, inst);
progress = true;
@@ -2980,18 +3096,18 @@ fs_visitor::lower_uniform_pull_constant_loads()
fs_reg const_offset_reg = inst->src[1];
assert(const_offset_reg.file == IMM &&
const_offset_reg.type == BRW_REGISTER_TYPE_UD);
- const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
+ const_offset_reg.ud /= 4;
fs_reg payload, offset;
if (devinfo->gen >= 9) {
/* We have to use a message header on Skylake to get SIMD4x2
* mode. Reserve space for the register.
*/
- offset = payload = fs_reg(GRF, alloc.allocate(2));
+ offset = payload = fs_reg(VGRF, alloc.allocate(2));
offset.reg_offset++;
inst->mlen = 2;
} else {
- offset = payload = fs_reg(GRF, alloc.allocate(1));
+ offset = payload = fs_reg(VGRF, alloc.allocate(1));
inst->mlen = 1;
}
@@ -3038,13 +3154,13 @@ fs_visitor::lower_load_payload()
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
continue;
- assert(inst->dst.file == MRF || inst->dst.file == GRF);
+ assert(inst->dst.file == MRF || inst->dst.file == VGRF);
assert(inst->saturate == false);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
if (dst.file == MRF)
- dst.reg = dst.reg & ~BRW_MRF_COMPR4;
+ dst.nr = dst.nr & ~BRW_MRF_COMPR4;
const fs_builder ibld(this, block, inst);
const fs_builder hbld = ibld.exec_all().group(8, 0);
@@ -3058,7 +3174,7 @@ fs_visitor::lower_load_payload()
dst = offset(dst, hbld, 1);
}
- if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
+ if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
* a straightforward copy. Instead, the result of the
@@ -3082,18 +3198,18 @@ fs_visitor::lower_load_payload()
if (inst->src[i].file != BAD_FILE) {
if (devinfo->has_compr4) {
fs_reg compr4_dst = retype(dst, inst->src[i].type);
- compr4_dst.reg |= BRW_MRF_COMPR4;
+ compr4_dst.nr |= BRW_MRF_COMPR4;
ibld.MOV(compr4_dst, inst->src[i]);
} else {
/* Platform doesn't have COMPR4. We have to fake it */
fs_reg mov_dst = retype(dst, inst->src[i].type);
ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
- mov_dst.reg += 4;
+ mov_dst.nr += 4;
ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
}
}
- dst.reg++;
+ dst.nr++;
}
/* The loop above only ever incremented us through the first set
@@ -3101,7 +3217,7 @@ fs_visitor::lower_load_payload()
* actually wrote to the first 8 registers, so we need to take
* that into account now.
*/
- dst.reg += 4;
+ dst.nr += 4;
/* The COMPR4 code took care of the first 4 sources. We'll let
* the regular path handle any remaining sources. Yes, we are
@@ -3149,7 +3265,7 @@ fs_visitor::lower_integer_multiplication()
continue;
if (inst->src[1].file == IMM &&
- inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+ inst->src[1].ud < (1 << 16)) {
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
* src1 are used.
@@ -3158,7 +3274,7 @@ fs_visitor::lower_integer_multiplication()
* single MUL instruction with that value in the proper location.
*/
if (devinfo->gen < 7) {
- fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+ fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
ibld.MOV(imm, inst->src[1]);
ibld.MUL(inst->dst, imm, inst->src[0]);
@@ -3213,11 +3329,11 @@ fs_visitor::lower_integer_multiplication()
fs_reg orig_dst = inst->dst;
if (orig_dst.is_null() || orig_dst.file == MRF) {
- inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
fs_reg low = inst->dst;
- fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+ fs_reg high(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
if (devinfo->gen >= 7) {
@@ -3225,8 +3341,8 @@ fs_visitor::lower_integer_multiplication()
fs_reg src1_1_w = inst->src[1];
if (inst->src[1].file == IMM) {
- src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
- src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+ src1_0_w.ud &= 0xffff;
+ src1_1_w.ud >>= 16;
} else {
src1_0_w.type = BRW_REGISTER_TYPE_UW;
if (src1_0_w.stride != 0) {
@@ -3381,7 +3497,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
- inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+ inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3411,7 +3527,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
if (payload.aa_dest_stencil_reg) {
- sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1));
+ sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
@@ -3419,7 +3535,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
if (prog_data->uses_omask) {
- sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1),
+ sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
/* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
@@ -3485,9 +3601,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
- fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+ fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
- payload.reg = bld.shader->alloc.allocate(load->regs_written);
+ payload.nr = bld.shader->alloc.allocate(load->regs_written);
load->dst = payload;
inst->src[0] = payload;
@@ -3502,7 +3618,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
* will do this for us if we just give it a COMPR4 destination.
*/
if (devinfo->gen < 6 && bld.dispatch_width() == 16)
- load->dst.reg |= BRW_MRF_COMPR4;
+ load->dst.nr |= BRW_MRF_COMPR4;
inst->resize_sources(0);
inst->base_mrf = 1;
@@ -3612,8 +3728,8 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
inst->src[0] = reg_undef;
inst->src[1] = sampler;
inst->resize_sources(2);
- inst->base_mrf = msg_begin.reg;
- inst->mlen = msg_end.reg - msg_begin.reg;
+ inst->base_mrf = msg_begin.nr;
+ inst->mlen = msg_end.nr - msg_begin.nr;
inst->header_size = 1;
}
@@ -3637,7 +3753,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
* go headerless.
*/
header_size = 1;
- message.reg--;
+ message.nr--;
}
for (unsigned i = 0; i < coord_components; i++) {
@@ -3707,8 +3823,8 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
inst->src[0] = reg_undef;
inst->src[1] = sampler;
inst->resize_sources(2);
- inst->base_mrf = message.reg;
- inst->mlen = msg_end.reg - message.reg;
+ inst->base_mrf = message.nr;
+ inst->mlen = msg_end.nr - message.nr;
inst->header_size = header_size;
/* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
@@ -3721,7 +3837,7 @@ is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
if (devinfo->gen < 8 && !devinfo->is_haswell)
return false;
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+ return sampler.file != IMM || sampler.ud >= 16;
}
static void
@@ -3844,17 +3960,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
coordinate_done = true;
break;
case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
case SHADER_OPCODE_TXF_UMS:
case SHADER_OPCODE_TXF_MCS:
- if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
+ if (op == SHADER_OPCODE_TXF_UMS ||
+ op == SHADER_OPCODE_TXF_CMS ||
+ op == SHADER_OPCODE_TXF_CMS_W) {
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
length++;
}
- if (op == SHADER_OPCODE_TXF_CMS) {
+ if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
/* Data from the multisample control surface. */
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
length++;
+
+ /* On Gen9+ we'll use ld2dms_w instead which has two registers for
+ * the MCS data.
+ */
+ if (op == SHADER_OPCODE_TXF_CMS_W) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
+ mcs.file == IMM ?
+ mcs :
+ offset(mcs, bld, 1));
+ length++;
+ }
}
/* There is no offsetting for this message; just copy in the integer
@@ -3912,7 +4042,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
else
mlen = length * reg_width;
- const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen),
+ const fs_reg src_payload = fs_reg(VGRF, bld.shader->alloc.allocate(mlen),
BRW_REGISTER_TYPE_F);
bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
@@ -3942,8 +4072,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
const fs_reg &sampler = inst->src[6];
const fs_reg &offset_value = inst->src[7];
assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
- const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
- const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud;
+ const unsigned coord_components = inst->src[8].ud;
+ const unsigned grad_components = inst->src[9].ud;
if (devinfo->gen >= 7) {
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
@@ -4068,6 +4198,10 @@ fs_visitor::lower_logical_sends()
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
break;
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
+ break;
+
case SHADER_OPCODE_TXF_UMS_LOGICAL:
lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
break;
@@ -4260,6 +4394,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
else
return inst->exec_size;
+ case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
+ /* This opcode can take up to 6 arguments which means that in some
+ * circumstances it can end up with a message that is too long in SIMD16
+ * mode.
+ */
+ const unsigned coord_components = inst->src[8].ud;
+ /* First three arguments are the sample index and the two arguments for
+ * the MCS data.
+ */
+ if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
+ return 8;
+ else
+ return inst->exec_size;
+ }
+
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
@@ -4473,51 +4622,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
}
switch (inst->dst.file) {
- case GRF:
- fprintf(file, "vgrf%d", inst->dst.reg);
- if (alloc.sizes[inst->dst.reg] != inst->regs_written ||
+ case VGRF:
+ fprintf(file, "vgrf%d", inst->dst.nr);
+ if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
inst->dst.reg_offset, inst->dst.subreg_offset);
break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->dst.nr);
+ break;
case MRF:
- fprintf(file, "m%d", inst->dst.reg);
+ fprintf(file, "m%d", inst->dst.nr);
break;
case BAD_FILE:
fprintf(file, "(null)");
break;
case UNIFORM:
- fprintf(file, "***u%d***", inst->dst.reg + inst->dst.reg_offset);
+ fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset);
break;
case ATTR:
- fprintf(file, "***attr%d***", inst->dst.reg + inst->dst.reg_offset);
+ fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset);
break;
- case HW_REG:
- if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->dst.fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
- inst->dst.fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->dst.nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->dst.subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+ break;
}
- if (inst->dst.fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
+ if (inst->dst.subnr)
+ fprintf(file, "+%d", inst->dst.subnr);
break;
case IMM:
unreachable("not reached");
@@ -4530,21 +4676,24 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->src[i].abs)
fprintf(file, "|");
switch (inst->src[i].file) {
- case GRF:
- fprintf(file, "vgrf%d", inst->src[i].reg);
- if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) ||
+ case VGRF:
+ fprintf(file, "vgrf%d", inst->src[i].nr);
+ if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
inst->src[i].subreg_offset)
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
break;
+ case FIXED_GRF:
+ fprintf(file, "g%d", inst->src[i].nr);
+ break;
case MRF:
- fprintf(file, "***m%d***", inst->src[i].reg);
+ fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
+ fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
+ fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
if (inst->src[i].reladdr) {
fprintf(file, "+reladdr");
} else if (inst->src[i].subreg_offset) {
@@ -4558,60 +4707,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case IMM:
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_F:
- fprintf(file, "%ff", inst->src[i].fixed_hw_reg.dw1.f);
+ fprintf(file, "%ff", inst->src[i].f);
break;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_D:
- fprintf(file, "%dd", inst->src[i].fixed_hw_reg.dw1.d);
+ fprintf(file, "%dd", inst->src[i].d);
break;
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_UD:
- fprintf(file, "%uu", inst->src[i].fixed_hw_reg.dw1.ud);
+ fprintf(file, "%uu", inst->src[i].ud);
break;
case BRW_REGISTER_TYPE_VF:
fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 0) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 8) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff),
- brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff));
+ brw_vf_to_float((inst->src[i].ud >> 0) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 8) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
+ brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
break;
default:
fprintf(file, "???");
break;
}
break;
- case HW_REG:
- if (inst->src[i].fixed_hw_reg.negate)
- fprintf(file, "-");
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
- if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
- switch (inst->src[i].fixed_hw_reg.nr) {
- case BRW_ARF_NULL:
- fprintf(file, "null");
- break;
- case BRW_ARF_ADDRESS:
- fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_ACCUMULATOR:
- fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr);
- break;
- case BRW_ARF_FLAG:
- fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- default:
- fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
- inst->src[i].fixed_hw_reg.subnr);
- break;
- }
- } else {
- fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+ case ARF:
+ switch (inst->src[i].nr) {
+ case BRW_ARF_NULL:
+ fprintf(file, "null");
+ break;
+ case BRW_ARF_ADDRESS:
+ fprintf(file, "a0.%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_ACCUMULATOR:
+ fprintf(file, "acc%d", inst->src[i].subnr);
+ break;
+ case BRW_ARF_FLAG:
+ fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
+ default:
+ fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+ break;
}
- if (inst->src[i].fixed_hw_reg.subnr)
- fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr);
- if (inst->src[i].fixed_hw_reg.abs)
- fprintf(file, "|");
+ if (inst->src[i].subnr)
+ fprintf(file, "+%d", inst->src[i].subnr);
break;
}
if (inst->src[i].abs)
@@ -4627,6 +4764,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, " ");
+ if (inst->force_writemask_all)
+ fprintf(file, "NoMask ");
+
if (dispatch_width == 16 && inst->exec_size == 8) {
if (inst->force_sechalf)
fprintf(file, "2ndhalf ");
@@ -4779,6 +4919,45 @@ fs_visitor::setup_vs_payload()
*
*/
void
+fs_visitor::setup_gs_payload()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ struct brw_gs_prog_data *gs_prog_data =
+ (struct brw_gs_prog_data *) prog_data;
+ struct brw_vue_prog_data *vue_prog_data =
+ (struct brw_vue_prog_data *) prog_data;
+
+ /* R0: thread header, R1: output URB handles */
+ payload.num_regs = 2;
+
+ if (gs_prog_data->include_primitive_id) {
+ /* R2: Primitive ID 0..7 */
+ payload.num_regs++;
+ }
+
+ /* Use a maximum of 32 registers for push-model inputs. */
+ const unsigned max_push_components = 32;
+
+ /* If pushing our inputs would take too many registers, reduce the URB read
+ * length (which is in HWords, or 8 registers), and resort to pulling.
+ *
+ * Note that the GS reads <URB Read Length> HWords for every vertex - so we
+ * have to multiply by VerticesIn to obtain the total storage requirement.
+ */
+ if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
+ max_push_components) {
+ gs_prog_data->base.include_vue_handles = true;
+
+ /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
+ payload.num_regs += nir->info.gs.vertices_in;
+
+ vue_prog_data->urb_read_length =
+ ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
+ }
+}
+
+void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
@@ -4925,7 +5104,7 @@ fs_visitor::fixup_3src_null_dest()
{
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
if (inst->is_3src() && inst->dst.is_null()) {
- inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
}
@@ -5035,6 +5214,55 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
}
bool
+fs_visitor::run_gs()
+{
+ assert(stage == MESA_SHADER_GEOMETRY);
+
+ setup_gs_payload();
+
+ this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
+
+ if (gs_compile->control_data_header_size_bits > 0) {
+ /* Create a VGRF to store accumulated control data bits. */
+ this->control_data_bits = vgrf(glsl_type::uint_type);
+
+ /* If we're outputting more than 32 control data bits, then EmitVertex()
+ * will set control_data_bits to 0 after emitting the first vertex.
+ * Otherwise, we need to initialize it to 0 here.
+ */
+ if (gs_compile->control_data_header_size_bits <= 32) {
+ const fs_builder abld = bld.annotate("initialize control data bits");
+ abld.MOV(this->control_data_bits, fs_reg(0u));
+ }
+ }
+
+ if (shader_time_index >= 0)
+ emit_shader_time_begin();
+
+ emit_nir_code();
+
+ emit_gs_thread_end();
+
+ if (shader_time_index >= 0)
+ emit_shader_time_end();
+
+ if (failed)
+ return false;
+
+ calculate_cfg();
+
+ optimize();
+
+ assign_curb_setup();
+ assign_gs_urb_setup();
+
+ fixup_3src_null_dest();
+ allocate_registers();
+
+ return !failed;
+}
+
+bool
fs_visitor::run_fs(bool do_rep_send)
{
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;