diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 784 |
1 files changed, 506 insertions, 278 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e218a85..3bec728 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -43,6 +43,7 @@ #include "brw_wm.h" #include "brw_fs.h" #include "brw_cs.h" +#include "brw_vec4_gs_visitor.h" #include "brw_cfg.h" #include "brw_dead_control_flow.h" #include "main/uniforms.h" @@ -75,8 +76,9 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, /* This will be the case for almost all instructions. */ switch (dst.file) { - case GRF: - case HW_REG: + case VGRF: + case ARF: + case FIXED_GRF: case MRF: case ATTR: this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size), @@ -203,7 +205,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; int regs_written = 4 * (bld.dispatch_width() / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type); + fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), dst.type); fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -232,7 +234,7 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf) const fs_builder ubld = bld.annotate("send dependency resolve") .half(0); - ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F)); + ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F)); } bool @@ -283,14 +285,15 @@ fs_inst::is_send_from_grf() const case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: return true; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - return src[1].file == GRF; + return src[1].file == VGRF; case FS_OPCODE_FB_WRITE: - return src[0].file == GRF; + return src[0].file == VGRF; default: if (is_tex()) - return src[0].file == GRF; + return src[0].file == VGRF; return false; } @@ -303,10 +306,10 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const return false; fs_reg reg = this->src[0]; - if (reg.file != GRF || reg.reg_offset != 0 || reg.stride == 0) + if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0) return false; - if (grf_alloc.sizes[reg.reg] != this->regs_written) + if (grf_alloc.sizes[reg.nr] != this->regs_written) return false; for (int i = 0; i < this->sources; i++) { @@ -378,7 +381,7 @@ fs_reg::fs_reg(float f) this->file = IMM; this->type = BRW_REGISTER_TYPE_F; this->stride = 0; - this->fixed_hw_reg.dw1.f = f; + this->f = f; } /** Immediate value constructor. */ @@ -388,7 +391,7 @@ fs_reg::fs_reg(int32_t i) this->file = IMM; this->type = BRW_REGISTER_TYPE_D; this->stride = 0; - this->fixed_hw_reg.dw1.d = i; + this->d = i; } /** Immediate value constructor. */ @@ -398,7 +401,7 @@ fs_reg::fs_reg(uint32_t u) this->file = IMM; this->type = BRW_REGISTER_TYPE_UD; this->stride = 0; - this->fixed_hw_reg.dw1.ud = u; + this->ud = u; } /** Vector float immediate value constructor. */ @@ -407,7 +410,7 @@ fs_reg::fs_reg(uint8_t vf[4]) init(); this->file = IMM; this->type = BRW_REGISTER_TYPE_VF; - memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned)); + memcpy(&this->ud, vf, sizeof(unsigned)); } /** Vector float immediate value constructor. */ @@ -416,42 +419,38 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3) init(); this->file = IMM; this->type = BRW_REGISTER_TYPE_VF; - this->fixed_hw_reg.dw1.ud = (vf0 << 0) | - (vf1 << 8) | - (vf2 << 16) | - (vf3 << 24); + this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24); } -/** Fixed brw_reg. */ -fs_reg::fs_reg(struct brw_reg fixed_hw_reg) +fs_reg::fs_reg(struct brw_reg reg) : + backend_reg(reg) { - init(); - this->file = HW_REG; - this->fixed_hw_reg = fixed_hw_reg; - this->type = fixed_hw_reg.type; + this->reg_offset = 0; + this->subreg_offset = 0; + this->reladdr = NULL; + this->stride = 1; + if (this->file == IMM && + (this->type != BRW_REGISTER_TYPE_V && + this->type != BRW_REGISTER_TYPE_UV && + this->type != BRW_REGISTER_TYPE_VF)) { + this->stride = 0; + } } bool fs_reg::equals(const fs_reg &r) const { - return (file == r.file && - reg == r.reg && + return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 && reg_offset == r.reg_offset && subreg_offset == r.subreg_offset && - type == r.type && - negate == r.negate && - abs == r.abs && !reladdr && !r.reladdr && - ((file != HW_REG && file != IMM) || - memcmp(&fixed_hw_reg, &r.fixed_hw_reg, - sizeof(fixed_hw_reg)) == 0) && stride == r.stride); } fs_reg & fs_reg::set_smear(unsigned subreg) { - assert(file != HW_REG && file != IMM); + assert(file != ARF && file != FIXED_GRF && file != IMM); subreg_offset = subreg * type_sz(type); stride = 0; return *this; @@ -466,9 +465,9 @@ fs_reg::is_contiguous() const unsigned fs_reg::component_size(unsigned width) const { - const unsigned stride = (file != HW_REG ? this->stride : - fixed_hw_reg.hstride == 0 ? 0 : - 1 << (fixed_hw_reg.hstride - 1)); + const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride : + hstride == 0 ? 0 : + 1 << (hstride - 1)); return MAX2(width * stride, 1) * type_sz(type); } @@ -514,6 +513,19 @@ type_size_scalar(const struct glsl_type *type) } /** + * Returns the number of scalar components needed to store type, assuming + * that vectors are padded out to vec4. + * + * This has the packing rules of type_size_vec4(), but counts components + * similar to type_size_scalar(). + */ +extern "C" int +type_size_vec4_times_4(const struct glsl_type *type) +{ + return 4 * type_size_vec4(type); +} + +/** * Create a MOV to read the timestamp register. * * The caller is responsible for emitting the MOV. The return value is @@ -529,7 +541,7 @@ fs_visitor::get_timestamp(const fs_builder &bld) 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); + fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. @@ -584,7 +596,7 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); + fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); diff.set_smear(0); const fs_builder cbld = ibld.group(1, 0); @@ -706,7 +718,7 @@ fs_inst::components_read(unsigned i) const assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); /* First/second FB write color. */ if (i < 2) - return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; + return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud; else return 1; @@ -717,6 +729,7 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_TXS_LOGICAL: case FS_OPCODE_TXB_LOGICAL: case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: case SHADER_OPCODE_TXF_UMS_LOGICAL: case SHADER_OPCODE_TXF_MCS_LOGICAL: case SHADER_OPCODE_LOD_LOGICAL: @@ -725,13 +738,16 @@ fs_inst::components_read(unsigned i) const assert(src[8].file == IMM && src[9].file == IMM); /* Texture coordinates. */ if (i == 0) - return src[8].fixed_hw_reg.dw1.ud; + return src[8].ud; /* Texture derivatives. */ else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL) - return src[9].fixed_hw_reg.dw1.ud; + return src[9].ud; /* Texture offset. */ else if (i == 7) return 2; + /* MCS */ + else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) + return 2; else return 1; @@ -740,7 +756,7 @@ fs_inst::components_read(unsigned i) const assert(src[3].file == IMM); /* Surface coordinates. */ if (i == 0) - return src[3].fixed_hw_reg.dw1.ud; + return src[3].ud; /* Surface operation source (ignored for reads). */ else if (i == 1) return 0; @@ -753,10 +769,10 @@ fs_inst::components_read(unsigned i) const src[4].file == IMM); /* Surface coordinates. */ if (i == 0) - return src[3].fixed_hw_reg.dw1.ud; + return src[3].ud; /* Surface operation source. */ else if (i == 1) - return src[4].fixed_hw_reg.dw1.ud; + return src[4].ud; else return 1; @@ -764,10 +780,10 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: { assert(src[3].file == IMM && src[4].file == IMM); - const unsigned op = src[4].fixed_hw_reg.dw1.ud; + const unsigned op = src[4].ud; /* Surface coordinates. */ if (i == 0) - return src[3].fixed_hw_reg.dw1.ud; + return src[3].ud; /* Surface operation source. */ else if (i == 1 && op == BRW_AOP_CMPWR) return 2; @@ -793,6 +809,7 @@ fs_inst::regs_read(int arg) const case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: @@ -825,7 +842,7 @@ fs_inst::regs_read(int arg) const return 1; default: - if (is_tex() && arg == 0 && src[0].file == GRF) + if (is_tex() && arg == 0 && src[0].file == VGRF) return mlen; break; } @@ -836,9 +853,10 @@ fs_inst::regs_read(int arg) const case UNIFORM: case IMM: return 1; - case GRF: + case ARF: + case FIXED_GRF: + case VGRF: case ATTR: - case HW_REG: return DIV_ROUND_UP(components_read(arg) * src[arg].component_size(exec_size), REG_SIZE); @@ -896,6 +914,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: @@ -938,26 +957,24 @@ fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, alloc.allocate(type_size_scalar(type) * reg_width), + return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width), brw_type_for_base_type(type)); } -/** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int reg) +fs_reg::fs_reg(enum brw_reg_file file, int nr) { init(); this->file = file; - this->reg = reg; + this->nr = nr; this->type = BRW_REGISTER_TYPE_F; this->stride = (file == UNIFORM ? 0 : 1); } -/** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type) +fs_reg::fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type) { init(); this->file = file; - this->reg = reg; + this->nr = nr; this->type = type; this->stride = (file == UNIFORM ? 0 : 1); } @@ -1285,9 +1302,9 @@ fs_visitor::emit_sampleid_setup() fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); if (key->compute_sample_id) { - fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D); + fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D); t1.set_smear(0); - fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W); + fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W); /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with * 8x multisampling, subspan 0 will represent sample N (where N @@ -1308,9 +1325,15 @@ fs_visitor::emit_sampleid_setup() * are sample 1 of subspan 0; the third group is sample 0 of * subspan 1, and finally sample 1 of subspan 1. */ + + /* SKL+ has an extra bit for the Starting Sample Pair Index to + * accomodate 16x MSAA. + */ + unsigned sspi_mask = devinfo->gen >= 9 ? 0x1c0 : 0xc0; + abld.exec_all().group(1, 0) .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - fs_reg(0xc0)); + fs_reg(sspi_mask)); abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); /* This works for both SIMD8 and SIMD16 */ @@ -1362,6 +1385,57 @@ fs_visitor::emit_discard_jump() } void +fs_visitor::emit_gs_thread_end() +{ + assert(stage == MESA_SHADER_GEOMETRY); + + struct brw_gs_prog_data *gs_prog_data = + (struct brw_gs_prog_data *) prog_data; + + if (gs_compile->control_data_header_size_bits > 0) { + emit_gs_control_data_bits(this->final_gs_vertex_count); + } + + const fs_builder abld = bld.annotate("thread end"); + fs_inst *inst; + + if (gs_prog_data->static_vertex_count != -1) { + foreach_in_list_reverse(fs_inst, prev, &this->instructions) { + if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 || + prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED || + prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT || + prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) { + prev->eot = true; + + /* Delete now dead instructions. */ + foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) { + if (dead == prev) + break; + dead->remove(); + } + return; + } else if (prev->is_control_flow() || prev->has_side_effects()) { + break; + } + } + fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1); + abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD))); + inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr); + inst->mlen = 1; + } else { + fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2); + fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); + sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + sources[1] = this->final_gs_vertex_count; + abld.LOAD_PAYLOAD(payload, sources, 2, 2); + inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + inst->mlen = 2; + } + inst->eot = true; + inst->offset = 0; +} + +void fs_visitor::assign_curb_setup() { if (dispatch_width == 8) { @@ -1384,7 +1458,7 @@ fs_visitor::assign_curb_setup() foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == UNIFORM) { - int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset; int constant_nr; if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { constant_nr = push_constant_loc[uniform_nr]; @@ -1400,10 +1474,11 @@ fs_visitor::assign_curb_setup() struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs + constant_nr / 8, constant_nr % 8); + brw_reg.abs = inst->src[i].abs; + brw_reg.negate = inst->src[i].negate; assert(inst->src[i].stride == 0); - inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = byte_offset( + inst->src[i] = byte_offset( retype(brw_reg, inst->src[i].type), inst->src[i].subreg_offset); } @@ -1518,13 +1593,13 @@ fs_visitor::assign_urb_setup() */ foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == FS_OPCODE_LINTERP) { - assert(inst->src[1].file == HW_REG); - inst->src[1].fixed_hw_reg.nr += urb_start; + assert(inst->src[1].file == FIXED_GRF); + inst->src[1].nr += urb_start; } if (inst->opcode == FS_OPCODE_CINTERP) { - assert(inst->src[0].file == HW_REG); - inst->src[0].fixed_hw_reg.nr += urb_start; + assert(inst->src[0].file == FIXED_GRF); + inst->src[0].nr += urb_start; } } @@ -1533,6 +1608,30 @@ fs_visitor::assign_urb_setup() } void +fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst) +{ + for (int i = 0; i < inst->sources; i++) { + if (inst->src[i].file == ATTR) { + int grf = payload.num_regs + + prog_data->curb_read_length + + inst->src[i].nr + + inst->src[i].reg_offset; + + unsigned width = inst->src[i].stride == 0 ? 1 : inst->exec_size; + struct brw_reg reg = + stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), + inst->src[i].subreg_offset), + inst->exec_size * inst->src[i].stride, + width, inst->src[i].stride); + reg.abs = inst->src[i].abs; + reg.negate = inst->src[i].negate; + + inst->src[i] = reg; + } + } +} + +void fs_visitor::assign_vs_urb_setup() { brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; @@ -1549,24 +1648,44 @@ fs_visitor::assign_vs_urb_setup() /* Rewrite all ATTR file references to the hw grf that they land in. */ foreach_block_and_inst(block, fs_inst, inst, cfg) { - for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == ATTR) { - int grf = payload.num_regs + - prog_data->curb_read_length + - inst->src[i].reg + - inst->src[i].reg_offset; - - inst->src[i].file = HW_REG; - inst->src[i].fixed_hw_reg = - stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), - inst->src[i].subreg_offset), - inst->exec_size * inst->src[i].stride, - inst->exec_size, inst->src[i].stride); - } + convert_attr_sources_to_hw_regs(inst); + } +} + +void +fs_visitor::assign_gs_urb_setup() +{ + assert(stage == MESA_SHADER_GEOMETRY); + + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data; + + first_non_payload_grf += + 8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in; + + const unsigned first_icp_handle = payload.num_regs - + (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0); + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + /* Lower URB_READ_SIMD8 opcodes into real messages. */ + if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) { + assert(inst->src[0].file == IMM); + inst->src[0] = retype(brw_vec8_grf(first_icp_handle + + inst->src[0].ud, + 0), BRW_REGISTER_TYPE_UD); + /* for now, assume constant - we can do per-slot offsets later */ + assert(inst->src[1].file == IMM); + inst->offset = inst->src[1].ud; + inst->src[1] = fs_reg(); + inst->mlen = 1; + inst->base_mrf = -1; } + + /* Rewrite all ATTR file references to GRFs. */ + convert_attr_sources_to_hw_regs(inst); } } + /** * Split large virtual GRFs into separate components if we can. * @@ -1609,30 +1728,30 @@ fs_visitor::split_virtual_grfs() /* Mark all used registers as fully splittable */ foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->dst.file == GRF) { - int reg = vgrf_to_reg[inst->dst.reg]; - for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++) + if (inst->dst.file == VGRF) { + int reg = vgrf_to_reg[inst->dst.nr]; + for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++) split_points[reg + j] = true; } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF) { - int reg = vgrf_to_reg[inst->src[i].reg]; - for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++) + if (inst->src[i].file == VGRF) { + int reg = vgrf_to_reg[inst->src[i].nr]; + for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++) split_points[reg + j] = true; } } } foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->dst.file == GRF) { - int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset; + if (inst->dst.file == VGRF) { + int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset; for (int j = 1; j < inst->regs_written; j++) split_points[reg + j] = false; } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF) { - int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset; + if (inst->src[i].file == VGRF) { + int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset; for (int j = 1; j < inst->regs_read(i); j++) split_points[reg + j] = false; } @@ -1678,16 +1797,16 @@ fs_visitor::split_virtual_grfs() assert(reg == reg_count); foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->dst.file == GRF) { - reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset; - inst->dst.reg = new_virtual_grf[reg]; + if (inst->dst.file == VGRF) { + reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset; + inst->dst.nr = new_virtual_grf[reg]; inst->dst.reg_offset = new_reg_offset[reg]; assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF) { - reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset; - inst->src[i].reg = new_virtual_grf[reg]; + if (inst->src[i].file == VGRF) { + reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset; + inst->src[i].nr = new_virtual_grf[reg]; inst->src[i].reg_offset = new_reg_offset[reg]; assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } @@ -1714,12 +1833,12 @@ fs_visitor::compact_virtual_grfs() /* Mark which virtual GRFs are used. */ foreach_block_and_inst(block, const fs_inst, inst, cfg) { - if (inst->dst.file == GRF) - remap_table[inst->dst.reg] = 0; + if (inst->dst.file == VGRF) + remap_table[inst->dst.nr] = 0; for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF) - remap_table[inst->src[i].reg] = 0; + if (inst->src[i].file == VGRF) + remap_table[inst->src[i].nr] = 0; } } @@ -1743,12 +1862,12 @@ fs_visitor::compact_virtual_grfs() /* Patch all the instructions to use the newly renumbered registers */ foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->dst.file == GRF) - inst->dst.reg = remap_table[inst->dst.reg]; + if (inst->dst.file == VGRF) + inst->dst.nr = remap_table[inst->dst.nr]; for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF) - inst->src[i].reg = remap_table[inst->src[i].reg]; + if (inst->src[i].file == VGRF) + inst->src[i].nr = remap_table[inst->src[i].nr]; } } @@ -1757,9 +1876,9 @@ fs_visitor::compact_virtual_grfs() * think some random VGRF is delta_xy. */ for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { - if (delta_xy[i].file == GRF) { - if (remap_table[delta_xy[i].reg] != -1) { - delta_xy[i].reg = remap_table[delta_xy[i].reg]; + if (delta_xy[i].file == VGRF) { + if (remap_table[delta_xy[i].nr] != -1) { + delta_xy[i].nr = remap_table[delta_xy[i].nr]; } else { delta_xy[i].file = BAD_FILE; } @@ -1811,7 +1930,7 @@ fs_visitor::assign_constant_locations() continue; if (inst->src[i].reladdr) { - int uniform = inst->src[i].reg; + int uniform = inst->src[i].nr; /* If this array isn't already present in the pull constant buffer, * add it. @@ -1823,7 +1942,7 @@ fs_visitor::assign_constant_locations() } } else { /* Mark the the one accessed uniform as live */ - int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; if (constant_nr >= 0 && constant_nr < (int) uniforms) is_live[constant_nr] = true; } @@ -1899,7 +2018,7 @@ fs_visitor::demote_pull_constants() continue; int pull_index; - unsigned location = inst->src[i].reg + inst->src[i].reg_offset; + unsigned location = inst->src[i].nr + inst->src[i].reg_offset; if (location >= uniforms) /* Out of bounds access */ pull_index = -1; else @@ -1910,7 +2029,7 @@ fs_visitor::demote_pull_constants() /* Set up the annotation tracking for new generated instructions. */ const fs_builder ibld(this, block, inst); - fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start); + const unsigned index = stage_prog_data->binding_table.pull_constants_start; fs_reg dst = vgrf(glsl_type::float_type); assert(inst->src[i].stride == 0); @@ -1918,7 +2037,7 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { VARYING_PULL_CONSTANT_LOAD(ibld, dst, - surf_index, + fs_reg(index), *inst->src[i].reladdr, pull_index); inst->src[i].reladdr = NULL; @@ -1927,13 +2046,14 @@ fs_visitor::demote_pull_constants() const fs_builder ubld = ibld.exec_all().group(8, 0); fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, surf_index, offset); + dst, fs_reg(index), offset); inst->src[i].set_smear(pull_index & 3); } + brw_mark_surface_used(prog_data, index); /* Rewrite the instruction to use the temporary VGRF. */ - inst->src[i].file = GRF; - inst->src[i].reg = dst.reg; + inst->src[i].file = VGRF; + inst->src[i].nr = dst.nr; inst->src[i].reg_offset = 0; } } @@ -1955,8 +2075,7 @@ fs_visitor::opt_algebraic() if (inst->dst.type != inst->src[0].type) assert(!"unimplemented: saturate mixed types"); - if (brw_saturate_immediate(inst->dst.type, - &inst->src[0].fixed_hw_reg)) { + if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) { inst->saturate = false; progress = true; } @@ -1996,7 +2115,7 @@ fs_visitor::opt_algebraic() if (inst->src[0].file == IMM) { assert(inst->src[0].type == BRW_REGISTER_TYPE_F); inst->opcode = BRW_OPCODE_MOV; - inst->src[0].fixed_hw_reg.dw1.f *= inst->src[1].fixed_hw_reg.dw1.f; + inst->src[0].f *= inst->src[1].f; inst->src[1] = reg_undef; progress = true; break; @@ -2017,7 +2136,7 @@ fs_visitor::opt_algebraic() if (inst->src[0].file == IMM) { assert(inst->src[0].type == BRW_REGISTER_TYPE_F); inst->opcode = BRW_OPCODE_MOV; - inst->src[0].fixed_hw_reg.dw1.f += inst->src[1].fixed_hw_reg.dw1.f; + inst->src[0].f += inst->src[1].f; inst->src[1] = reg_undef; progress = true; break; @@ -2066,7 +2185,7 @@ fs_visitor::opt_algebraic() case BRW_CONDITIONAL_L: switch (inst->src[1].type) { case BRW_REGISTER_TYPE_F: - if (inst->src[1].fixed_hw_reg.dw1.f >= 1.0f) { + if (inst->src[1].f >= 1.0f) { inst->opcode = BRW_OPCODE_MOV; inst->src[1] = reg_undef; inst->conditional_mod = BRW_CONDITIONAL_NONE; @@ -2081,7 +2200,7 @@ fs_visitor::opt_algebraic() case BRW_CONDITIONAL_G: switch (inst->src[1].type) { case BRW_REGISTER_TYPE_F: - if (inst->src[1].fixed_hw_reg.dw1.f <= 0.0f) { + if (inst->src[1].f <= 0.0f) { inst->opcode = BRW_OPCODE_MOV; inst->src[1] = reg_undef; inst->conditional_mod = BRW_CONDITIONAL_NONE; @@ -2118,7 +2237,7 @@ fs_visitor::opt_algebraic() progress = true; } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) { inst->opcode = BRW_OPCODE_ADD; - inst->src[1].fixed_hw_reg.dw1.f *= inst->src[2].fixed_hw_reg.dw1.f; + inst->src[1].f *= inst->src[2].f; inst->src[2] = reg_undef; progress = true; } @@ -2143,7 +2262,7 @@ fs_visitor::opt_algebraic() } else if (inst->src[1].file == IMM) { inst->opcode = BRW_OPCODE_MOV; inst->src[0] = component(inst->src[0], - inst->src[1].fixed_hw_reg.dw1.ud); + inst->src[1].ud); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -2344,31 +2463,31 @@ fs_visitor::opt_register_renaming() /* Rewrite instruction sources. */ for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file == GRF && - remap[inst->src[i].reg] != -1 && - remap[inst->src[i].reg] != inst->src[i].reg) { - inst->src[i].reg = remap[inst->src[i].reg]; + if (inst->src[i].file == VGRF && + remap[inst->src[i].nr] != -1 && + remap[inst->src[i].nr] != inst->src[i].nr) { + inst->src[i].nr = remap[inst->src[i].nr]; progress = true; } } - const int dst = inst->dst.reg; + const int dst = inst->dst.nr; if (depth == 0 && - inst->dst.file == GRF && - alloc.sizes[inst->dst.reg] == inst->exec_size / 8 && + inst->dst.file == VGRF && + alloc.sizes[inst->dst.nr] == inst->exec_size / 8 && !inst->is_partial_write()) { if (remap[dst] == -1) { remap[dst] = dst; } else { remap[dst] = alloc.allocate(inst->exec_size / 8); - inst->dst.reg = remap[dst]; + inst->dst.nr = remap[dst]; progress = true; } - } else if (inst->dst.file == GRF && + } else if (inst->dst.file == VGRF && remap[dst] != -1 && remap[dst] != dst) { - inst->dst.reg = remap[dst]; + inst->dst.nr = remap[dst]; progress = true; } } @@ -2377,8 +2496,8 @@ fs_visitor::opt_register_renaming() invalidate_live_intervals(); for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { - if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) { - delta_xy[i].reg = remap[delta_xy[i].reg]; + if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) { + delta_xy[i].nr = remap[delta_xy[i].nr]; } } } @@ -2445,7 +2564,7 @@ fs_visitor::compute_to_mrf() if (inst->opcode != BRW_OPCODE_MOV || inst->is_partial_write() || - inst->dst.file != MRF || inst->src[0].file != GRF || + inst->dst.file != MRF || inst->src[0].file != VGRF || inst->dst.type != inst->src[0].type || inst->src[0].abs || inst->src[0].negate || !inst->src[0].is_contiguous() || @@ -2455,9 +2574,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.nr & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.reg & BRW_MRF_COMPR4) { + if (inst->dst.nr & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (inst->exec_size == 16) { mrf_high = mrf_low + 1; @@ -2468,15 +2587,15 @@ fs_visitor::compute_to_mrf() /* Can't compute-to-MRF this GRF if someone else was going to * read it later. */ - if (this->virtual_grf_end[inst->src[0].reg] > ip) + if (this->virtual_grf_end[inst->src[0].nr] > ip) continue; /* Found a move of a GRF to a MRF. Let's see if we can go * rewrite the thing that made this GRF to write into the MRF. */ foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == inst->src[0].reg) { + if (scan_inst->dst.file == VGRF && + scan_inst->dst.nr == inst->src[0].nr) { /* Found the last thing to write our reg we want to turn * into a compute-to-MRF. */ @@ -2511,7 +2630,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.reg = inst->dst.reg; + scan_inst->dst.nr = inst->dst.nr; scan_inst->saturate |= inst->saturate; inst->remove(block); progress = true; @@ -2531,8 +2650,8 @@ fs_visitor::compute_to_mrf() */ bool interfered = false; for (int i = 0; i < scan_inst->sources; i++) { - if (scan_inst->src[i].file == GRF && - scan_inst->src[i].reg == inst->src[0].reg && + if (scan_inst->src[i].file == VGRF && + scan_inst->src[i].nr == inst->src[0].nr && scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { interfered = true; } @@ -2544,10 +2663,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.nr & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.nr & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (scan_inst->exec_size == 16) { scan_mrf_high = scan_mrf_low + 1; @@ -2690,8 +2809,8 @@ fs_visitor::emit_repclear_shader() /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ if (uniforms == 1) { - assert(mov->src[0].file == HW_REG); - mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0); + assert(mov->src[0].file == FIXED_GRF); + mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); } } @@ -2718,7 +2837,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.nr]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(block); progress = true; @@ -2728,7 +2847,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.reg] = NULL; + last_mrf_move[inst->dst.nr] = NULL; } if (inst->mlen > 0 && inst->base_mrf != -1) { @@ -2741,10 +2860,10 @@ fs_visitor::remove_duplicate_mrf_writes() } /* Clear out any MRF move records whose sources got overwritten. */ - if (inst->dst.file == GRF) { + if (inst->dst.file == VGRF) { for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) { if (last_mrf_move[i] && - last_mrf_move[i]->src[0].reg == inst->dst.reg) { + last_mrf_move[i]->src[0].nr == inst->dst.nr) { last_mrf_move[i] = NULL; } } @@ -2752,9 +2871,9 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF && - inst->src[0].file == GRF && + inst->src[0].file == VGRF && !inst->is_partial_write()) { - last_mrf_move[inst->dst.reg] = inst; + last_mrf_move[inst->dst.nr] = inst; } } @@ -2770,11 +2889,8 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len) /* Clear the flag for registers that actually got read (as expected). */ for (int i = 0; i < inst->sources; i++) { int grf; - if (inst->src[i].file == GRF) { - grf = inst->src[i].reg; - } else if (inst->src[i].file == HW_REG && - inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { - grf = inst->src[i].fixed_hw_reg.nr; + if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) { + grf = inst->src[i].nr; } else { continue; } @@ -2809,7 +2925,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { int write_len = inst->regs_written; - int first_write_grf = inst->dst.reg; + int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -2840,9 +2956,9 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, * instruction but a MOV that might have left us an outstanding * dependency has more latency than a MOV. */ - if (scan_inst->dst.file == GRF) { + if (scan_inst->dst.file == VGRF) { for (int i = 0; i < scan_inst->regs_written; i++) { - int reg = scan_inst->dst.reg + i; + int reg = scan_inst->dst.nr + i; if (reg >= first_write_grf && reg < first_write_grf + write_len && @@ -2880,7 +2996,7 @@ void fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { int write_len = inst->regs_written; - int first_write_grf = inst->dst.reg; + int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -2906,13 +3022,13 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins /* We insert our reads as late as possible since they're reading the * result of a SEND, which has massive latency. */ - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg >= first_write_grf && - scan_inst->dst.reg < first_write_grf + write_len && - needs_dep[scan_inst->dst.reg - first_write_grf]) { + if (scan_inst->dst.file == VGRF && + scan_inst->dst.nr >= first_write_grf && + scan_inst->dst.nr < first_write_grf + write_len && + needs_dep[scan_inst->dst.nr - first_write_grf]) { DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst), - scan_inst->dst.reg); - needs_dep[scan_inst->dst.reg - first_write_grf] = false; + scan_inst->dst.nr); + needs_dep[scan_inst->dst.nr - first_write_grf] = false; } /* Continue the loop only if we haven't resolved all the dependencies */ @@ -2939,7 +3055,7 @@ fs_visitor::insert_gen4_send_dependency_workarounds() */ foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->mlen != 0 && inst->dst.file == GRF) { + if (inst->mlen != 0 && inst->dst.file == VGRF) { insert_gen4_pre_send_dependency_workarounds(block, inst); insert_gen4_post_send_dependency_workarounds(block, inst); progress = true; @@ -2980,18 +3096,18 @@ fs_visitor::lower_uniform_pull_constant_loads() fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.fixed_hw_reg.dw1.ud /= 4; + const_offset_reg.ud /= 4; fs_reg payload, offset; if (devinfo->gen >= 9) { /* We have to use a message header on Skylake to get SIMD4x2 * mode. Reserve space for the register. */ - offset = payload = fs_reg(GRF, alloc.allocate(2)); + offset = payload = fs_reg(VGRF, alloc.allocate(2)); offset.reg_offset++; inst->mlen = 2; } else { - offset = payload = fs_reg(GRF, alloc.allocate(1)); + offset = payload = fs_reg(VGRF, alloc.allocate(1)); inst->mlen = 1; } @@ -3038,13 +3154,13 @@ fs_visitor::lower_load_payload() if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) continue; - assert(inst->dst.file == MRF || inst->dst.file == GRF); + assert(inst->dst.file == MRF || inst->dst.file == VGRF); assert(inst->saturate == false); fs_reg dst = inst->dst; /* Get rid of COMPR4. We'll add it back in if we need it */ if (dst.file == MRF) - dst.reg = dst.reg & ~BRW_MRF_COMPR4; + dst.nr = dst.nr & ~BRW_MRF_COMPR4; const fs_builder ibld(this, block, inst); const fs_builder hbld = ibld.exec_all().group(8, 0); @@ -3058,7 +3174,7 @@ fs_visitor::lower_load_payload() dst = offset(dst, hbld, 1); } - if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) && + if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) && inst->exec_size > 8) { /* In this case, the payload portion of the LOAD_PAYLOAD isn't * a straightforward copy. Instead, the result of the @@ -3082,18 +3198,18 @@ fs_visitor::lower_load_payload() if (inst->src[i].file != BAD_FILE) { if (devinfo->has_compr4) { fs_reg compr4_dst = retype(dst, inst->src[i].type); - compr4_dst.reg |= BRW_MRF_COMPR4; + compr4_dst.nr |= BRW_MRF_COMPR4; ibld.MOV(compr4_dst, inst->src[i]); } else { /* Platform doesn't have COMPR4. We have to fake it */ fs_reg mov_dst = retype(dst, inst->src[i].type); ibld.half(0).MOV(mov_dst, half(inst->src[i], 0)); - mov_dst.reg += 4; + mov_dst.nr += 4; ibld.half(1).MOV(mov_dst, half(inst->src[i], 1)); } } - dst.reg++; + dst.nr++; } /* The loop above only ever incremented us through the first set @@ -3101,7 +3217,7 @@ fs_visitor::lower_load_payload() * actually wrote to the first 8 registers, so we need to take * that into account now. */ - dst.reg += 4; + dst.nr += 4; /* The COMPR4 code took care of the first 4 sources. We'll let * the regular path handle any remaining sources. Yes, we are @@ -3149,7 +3265,7 @@ fs_visitor::lower_integer_multiplication() continue; if (inst->src[1].file == IMM && - inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { + inst->src[1].ud < (1 << 16)) { /* The MUL instruction isn't commutative. On Gen <= 6, only the low * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of * src1 are used. @@ -3158,7 +3274,7 @@ fs_visitor::lower_integer_multiplication() * single MUL instruction with that value in the proper location. */ if (devinfo->gen < 7) { - fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), + fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type); ibld.MOV(imm, inst->src[1]); ibld.MUL(inst->dst, imm, inst->src[0]); @@ -3213,11 +3329,11 @@ fs_visitor::lower_integer_multiplication() fs_reg orig_dst = inst->dst; if (orig_dst.is_null() || orig_dst.file == MRF) { - inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), + inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type); } fs_reg low = inst->dst; - fs_reg high(GRF, alloc.allocate(dispatch_width / 8), + fs_reg high(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type); if (devinfo->gen >= 7) { @@ -3225,8 +3341,8 @@ fs_visitor::lower_integer_multiplication() fs_reg src1_1_w = inst->src[1]; if (inst->src[1].file == IMM) { - src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff; - src1_1_w.fixed_hw_reg.dw1.ud >>= 16; + src1_0_w.ud &= 0xffff; + src1_1_w.ud >>= 16; } else { src1_0_w.type = BRW_REGISTER_TYPE_UW; if (src1_0_w.stride != 0) { @@ -3381,7 +3497,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; const unsigned components = - inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; + inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud; /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. @@ -3411,7 +3527,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } if (payload.aa_dest_stencil_reg) { - sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1)); + sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1)); bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha") .MOV(sources[length], fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))); @@ -3419,7 +3535,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } if (prog_data->uses_omask) { - sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1), + sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are @@ -3485,9 +3601,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, fs_inst *load; if (devinfo->gen >= 7) { /* Send from the GRF */ - fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); + fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F); load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); - payload.reg = bld.shader->alloc.allocate(load->regs_written); + payload.nr = bld.shader->alloc.allocate(load->regs_written); load->dst = payload; inst->src[0] = payload; @@ -3502,7 +3618,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, * will do this for us if we just give it a COMPR4 destination. */ if (devinfo->gen < 6 && bld.dispatch_width() == 16) - load->dst.reg |= BRW_MRF_COMPR4; + load->dst.nr |= BRW_MRF_COMPR4; inst->resize_sources(0); inst->base_mrf = 1; @@ -3612,8 +3728,8 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op, inst->src[0] = reg_undef; inst->src[1] = sampler; inst->resize_sources(2); - inst->base_mrf = msg_begin.reg; - inst->mlen = msg_end.reg - msg_begin.reg; + inst->base_mrf = msg_begin.nr; + inst->mlen = msg_end.nr - msg_begin.nr; inst->header_size = 1; } @@ -3637,7 +3753,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, * go headerless. */ header_size = 1; - message.reg--; + message.nr--; } for (unsigned i = 0; i < coord_components; i++) { @@ -3707,8 +3823,8 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, inst->src[0] = reg_undef; inst->src[1] = sampler; inst->resize_sources(2); - inst->base_mrf = message.reg; - inst->mlen = msg_end.reg - message.reg; + inst->base_mrf = message.nr; + inst->mlen = msg_end.nr - message.nr; inst->header_size = header_size; /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ @@ -3721,7 +3837,7 @@ is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler) if (devinfo->gen < 8 && !devinfo->is_haswell) return false; - return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16; + return sampler.file != IMM || sampler.ud >= 16; } static void @@ -3844,17 +3960,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, coordinate_done = true; break; case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: - if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) { + if (op == SHADER_OPCODE_TXF_UMS || + op == SHADER_OPCODE_TXF_CMS || + op == SHADER_OPCODE_TXF_CMS_W) { bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); length++; } - if (op == SHADER_OPCODE_TXF_CMS) { + if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) { /* Data from the multisample control surface. */ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); length++; + + /* On Gen9+ we'll use ld2dms_w instead which has two registers for + * the MCS data. + */ + if (op == SHADER_OPCODE_TXF_CMS_W) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), + mcs.file == IMM ? + mcs : + offset(mcs, bld, 1)); + length++; + } } /* There is no offsetting for this message; just copy in the integer @@ -3912,7 +4042,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, else mlen = length * reg_width; - const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen), + const fs_reg src_payload = fs_reg(VGRF, bld.shader->alloc.allocate(mlen), BRW_REGISTER_TYPE_F); bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); @@ -3942,8 +4072,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &sampler = inst->src[6]; const fs_reg &offset_value = inst->src[7]; assert(inst->src[8].file == IMM && inst->src[9].file == IMM); - const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud; - const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud; + const unsigned coord_components = inst->src[8].ud; + const unsigned grad_components = inst->src[9].ud; if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, @@ -4068,6 +4198,10 @@ fs_visitor::lower_logical_sends() lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS); break; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W); + break; + case SHADER_OPCODE_TXF_UMS_LOGICAL: lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS); break; @@ -4260,6 +4394,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, else return inst->exec_size; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: { + /* This opcode can take up to 6 arguments which means that in some + * circumstances it can end up with a message that is too long in SIMD16 + * mode. + */ + const unsigned coord_components = inst->src[8].ud; + /* First three arguments are the sample index and the two arguments for + * the MCS data. + */ + if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE) + return 8; + else + return inst->exec_size; + } + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: @@ -4473,51 +4622,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) } switch (inst->dst.file) { - case GRF: - fprintf(file, "vgrf%d", inst->dst.reg); - if (alloc.sizes[inst->dst.reg] != inst->regs_written || + case VGRF: + fprintf(file, "vgrf%d", inst->dst.nr); + if (alloc.sizes[inst->dst.nr] != inst->regs_written || inst->dst.subreg_offset) fprintf(file, "+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); break; + case FIXED_GRF: + fprintf(file, "g%d", inst->dst.nr); + break; case MRF: - fprintf(file, "m%d", inst->dst.reg); + fprintf(file, "m%d", inst->dst.nr); break; case BAD_FILE: fprintf(file, "(null)"); break; case UNIFORM: - fprintf(file, "***u%d***", inst->dst.reg + inst->dst.reg_offset); + fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset); break; case ATTR: - fprintf(file, "***attr%d***", inst->dst.reg + inst->dst.reg_offset); + fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset); break; - case HW_REG: - if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { - switch (inst->dst.fixed_hw_reg.nr) { - case BRW_ARF_NULL: - fprintf(file, "null"); - break; - case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr); - break; - case BRW_ARF_ACCUMULATOR: - fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr); - break; - case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, - inst->dst.fixed_hw_reg.subnr); - break; - default: - fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf, - inst->dst.fixed_hw_reg.subnr); - break; - } - } else { - fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr); + case ARF: + switch (inst->dst.nr) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->dst.subnr); + break; + case BRW_ARF_ACCUMULATOR: + fprintf(file, "acc%d", inst->dst.subnr); + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr); + break; } - if (inst->dst.fixed_hw_reg.subnr) - fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr); + if (inst->dst.subnr) + fprintf(file, "+%d", inst->dst.subnr); break; case IMM: unreachable("not reached"); @@ -4530,21 +4676,24 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->src[i].abs) fprintf(file, "|"); switch (inst->src[i].file) { - case GRF: - fprintf(file, "vgrf%d", inst->src[i].reg); - if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) || + case VGRF: + fprintf(file, "vgrf%d", inst->src[i].nr); + if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) || inst->src[i].subreg_offset) fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); break; + case FIXED_GRF: + fprintf(file, "g%d", inst->src[i].nr); + break; case MRF: - fprintf(file, "***m%d***", inst->src[i].reg); + fprintf(file, "***m%d***", inst->src[i].nr); break; case ATTR: - fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset); + fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset); break; case UNIFORM: - fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset); + fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset); if (inst->src[i].reladdr) { fprintf(file, "+reladdr"); } else if (inst->src[i].subreg_offset) { @@ -4558,60 +4707,48 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case IMM: switch (inst->src[i].type) { case BRW_REGISTER_TYPE_F: - fprintf(file, "%ff", inst->src[i].fixed_hw_reg.dw1.f); + fprintf(file, "%ff", inst->src[i].f); break; case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_D: - fprintf(file, "%dd", inst->src[i].fixed_hw_reg.dw1.d); + fprintf(file, "%dd", inst->src[i].d); break; case BRW_REGISTER_TYPE_UW: case BRW_REGISTER_TYPE_UD: - fprintf(file, "%uu", inst->src[i].fixed_hw_reg.dw1.ud); + fprintf(file, "%uu", inst->src[i].ud); break; case BRW_REGISTER_TYPE_VF: fprintf(file, "[%-gF, %-gF, %-gF, %-gF]", - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 0) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 8) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff), - brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff)); + brw_vf_to_float((inst->src[i].ud >> 0) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 8) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), + brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); break; default: fprintf(file, "???"); break; } break; - case HW_REG: - if (inst->src[i].fixed_hw_reg.negate) - fprintf(file, "-"); - if (inst->src[i].fixed_hw_reg.abs) - fprintf(file, "|"); - if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) { - switch (inst->src[i].fixed_hw_reg.nr) { - case BRW_ARF_NULL: - fprintf(file, "null"); - break; - case BRW_ARF_ADDRESS: - fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr); - break; - case BRW_ARF_ACCUMULATOR: - fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr); - break; - case BRW_ARF_FLAG: - fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, - inst->src[i].fixed_hw_reg.subnr); - break; - default: - fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf, - inst->src[i].fixed_hw_reg.subnr); - break; - } - } else { - fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr); + case ARF: + switch (inst->src[i].nr) { + case BRW_ARF_NULL: + fprintf(file, "null"); + break; + case BRW_ARF_ADDRESS: + fprintf(file, "a0.%d", inst->src[i].subnr); + break; + case BRW_ARF_ACCUMULATOR: + fprintf(file, "acc%d", inst->src[i].subnr); + break; + case BRW_ARF_FLAG: + fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); + break; + default: + fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr); + break; } - if (inst->src[i].fixed_hw_reg.subnr) - fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr); - if (inst->src[i].fixed_hw_reg.abs) - fprintf(file, "|"); + if (inst->src[i].subnr) + fprintf(file, "+%d", inst->src[i].subnr); break; } if (inst->src[i].abs) @@ -4627,6 +4764,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, " "); + if (inst->force_writemask_all) + fprintf(file, "NoMask "); + if (dispatch_width == 16 && inst->exec_size == 8) { if (inst->force_sechalf) fprintf(file, "2ndhalf "); @@ -4779,6 +4919,45 @@ fs_visitor::setup_vs_payload() * */ void +fs_visitor::setup_gs_payload() +{ + assert(stage == MESA_SHADER_GEOMETRY); + + struct brw_gs_prog_data *gs_prog_data = + (struct brw_gs_prog_data *) prog_data; + struct brw_vue_prog_data *vue_prog_data = + (struct brw_vue_prog_data *) prog_data; + + /* R0: thread header, R1: output URB handles */ + payload.num_regs = 2; + + if (gs_prog_data->include_primitive_id) { + /* R2: Primitive ID 0..7 */ + payload.num_regs++; + } + + /* Use a maximum of 32 registers for push-model inputs. */ + const unsigned max_push_components = 32; + + /* If pushing our inputs would take too many registers, reduce the URB read + * length (which is in HWords, or 8 registers), and resort to pulling. + * + * Note that the GS reads <URB Read Length> HWords for every vertex - so we + * have to multiply by VerticesIn to obtain the total storage requirement. + */ + if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in > + max_push_components) { + gs_prog_data->base.include_vue_handles = true; + + /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */ + payload.num_regs += nir->info.gs.vertices_in; + + vue_prog_data->urb_read_length = + ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8; + } +} + +void fs_visitor::setup_cs_payload() { assert(devinfo->gen >= 7); @@ -4925,7 +5104,7 @@ fs_visitor::fixup_3src_null_dest() { foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { if (inst->is_3src() && inst->dst.is_null()) { - inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), + inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type); } } @@ -5035,6 +5214,55 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) } bool +fs_visitor::run_gs() +{ + assert(stage == MESA_SHADER_GEOMETRY); + + setup_gs_payload(); + + this->final_gs_vertex_count = vgrf(glsl_type::uint_type); + + if (gs_compile->control_data_header_size_bits > 0) { + /* Create a VGRF to store accumulated control data bits. */ + this->control_data_bits = vgrf(glsl_type::uint_type); + + /* If we're outputting more than 32 control data bits, then EmitVertex() + * will set control_data_bits to 0 after emitting the first vertex. + * Otherwise, we need to initialize it to 0 here. + */ + if (gs_compile->control_data_header_size_bits <= 32) { + const fs_builder abld = bld.annotate("initialize control data bits"); + abld.MOV(this->control_data_bits, fs_reg(0u)); + } + } + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + emit_nir_code(); + + emit_gs_thread_end(); + + if (shader_time_index >= 0) + emit_shader_time_end(); + + if (failed) + return false; + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + assign_gs_urb_setup(); + + fixup_3src_null_dest(); + allocate_registers(); + + return !failed; +} + +bool fs_visitor::run_fs(bool do_rep_send) { brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data; |