diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 56 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_live_variables.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_validate.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_ir_fs.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.h | 7 |
17 files changed, 94 insertions, 82 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 75642d3..8401798 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -354,7 +354,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const return false; fs_reg reg = this->src[0]; - if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0) + if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0) return false; if (grf_alloc.sizes[reg.nr] != this->regs_written) @@ -366,7 +366,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const return false; if (i < this->header_size) { - reg.reg_offset += 1; + reg.offset += REG_SIZE; } else { reg = horiz_offset(reg, this->exec_size); } @@ -425,7 +425,7 @@ fs_reg::fs_reg() fs_reg::fs_reg(struct ::brw_reg reg) : backend_reg(reg) { - this->reg_offset = 0; + this->offset = 0; this->subreg_offset = 0; this->stride = 1; if (this->file == IMM && @@ -1463,7 +1463,7 @@ fs_visitor::assign_curb_setup() foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == UNIFORM) { - int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4; int constant_nr; if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { constant_nr = push_constant_loc[uniform_nr]; @@ -1620,7 +1620,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst) int grf = payload.num_regs + prog_data->curb_read_length + inst->src[i].nr + - inst->src[i].reg_offset; + inst->src[i].offset / REG_SIZE; /* As explained at brw_reg_from_fs_reg, From the Haswell PRM: * @@ -1773,13 +1773,13 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { - int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset; + int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; for (int j = 1; j < inst->regs_written; j++) split_points[reg + j] = false; } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset; + int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; for (int j = 1; j < inst->regs_read(i); j++) split_points[reg + j] = false; } @@ -1826,16 +1826,18 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { - reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset; + reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; inst->dst.nr = new_virtual_grf[reg]; - inst->dst.reg_offset = new_reg_offset[reg]; + inst->dst.offset = new_reg_offset[reg] * REG_SIZE + + inst->dst.offset % REG_SIZE; assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset; + reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; inst->src[i].nr = new_virtual_grf[reg]; - inst->src[i].reg_offset = new_reg_offset[reg]; + inst->src[i].offset = new_reg_offset[reg] * REG_SIZE + + inst->src[i].offset % REG_SIZE; assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } } @@ -2001,7 +2003,7 @@ fs_visitor::assign_constant_locations() if (inst->src[i].file != UNIFORM) continue; - int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; + int constant_nr = inst->src[i].nr + inst->src[i].offset / 4; if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { assert(inst->src[2].ud % 4 == 0); @@ -2155,7 +2157,7 @@ fs_visitor::lower_constant_loads() if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) continue; - unsigned location = inst->src[i].nr + inst->src[i].reg_offset; + unsigned location = inst->src[i].nr + inst->src[i].offset / 4; if (location >= uniforms) continue; /* Out of bounds access */ @@ -2182,7 +2184,7 @@ fs_visitor::lower_constant_loads() /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; - inst->src[i].reg_offset = 0; + inst->src[i].offset %= 4; inst->src[i].set_smear((pull_index & 3) * 4 / type_sz(inst->src[i].type)); @@ -2192,7 +2194,7 @@ fs_visitor::lower_constant_loads() if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && inst->src[0].file == UNIFORM) { - unsigned location = inst->src[0].nr + inst->src[0].reg_offset; + unsigned location = inst->src[0].nr + inst->src[0].offset / 4; if (location >= uniforms) continue; /* Out of bounds access */ @@ -2748,9 +2750,9 @@ fs_visitor::compute_to_mrf() * would need us to understand coalescing out more than one MOV at * a time. */ - if (scan_inst->dst.reg_offset < inst->src[0].reg_offset || - scan_inst->dst.reg_offset + scan_inst->regs_written > - inst->src[0].reg_offset + inst->regs_read(0)) + if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE || + scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written > + inst->src[0].offset / REG_SIZE + inst->regs_read(0)) break; /* SEND instructions can't have MRF as a destination. */ @@ -2852,7 +2854,7 @@ fs_visitor::compute_to_mrf() } scan_inst->dst.file = MRF; - scan_inst->dst.reg_offset = 0; + scan_inst->dst.offset %= REG_SIZE; scan_inst->saturate |= inst->saturate; if (!regs_left) break; @@ -3264,7 +3266,7 @@ fs_visitor::lower_uniform_pull_constant_loads() * mode. Reserve space for the register. */ offset = payload = fs_reg(VGRF, alloc.allocate(2)); - offset.reg_offset++; + offset.offset += REG_SIZE; inst->mlen = 2; } else { offset = payload = fs_reg(VGRF, alloc.allocate(1)); @@ -5317,7 +5319,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (alloc.sizes[inst->dst.nr] != inst->regs_written || inst->dst.subreg_offset) fprintf(file, "+%d.%d", - inst->dst.reg_offset, inst->dst.subreg_offset); + inst->dst.offset / REG_SIZE, inst->dst.subreg_offset); break; case FIXED_GRF: fprintf(file, "g%d", inst->dst.nr); @@ -5329,10 +5331,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "(null)"); break; case UNIFORM: - fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset); + fprintf(file, "***u%d***", inst->dst.nr + inst->dst.offset / 4); break; case ATTR: - fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset); + fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.offset / REG_SIZE); break; case ARF: switch (inst->dst.nr) { @@ -5372,7 +5374,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "vgrf%d", inst->src[i].nr); if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) || inst->src[i].subreg_offset) - fprintf(file, "+%d.%d", inst->src[i].reg_offset, + fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE, inst->src[i].subreg_offset); break; case FIXED_GRF: @@ -5382,12 +5384,12 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "***m%d***", inst->src[i].nr); break; case ATTR: - fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset); + fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].offset / REG_SIZE); break; case UNIFORM: - fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset); + fprintf(file, "u%d", inst->src[i].nr + inst->src[i].offset / 4); if (inst->src[i].subreg_offset) { - fprintf(file, "+%d.%d", inst->src[i].reg_offset, + fprintf(file, "+%d.%d", inst->src[i].offset / 4, inst->src[i].subreg_offset); } break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index 3b123a7..7607d20 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -90,7 +90,8 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->overwrites_reg(inst->src[0])) { if (scan_inst->is_partial_write() || - scan_inst->dst.reg_offset != inst->src[0].reg_offset || + scan_inst->dst.offset / REG_SIZE != + inst->src[0].offset / REG_SIZE || scan_inst->exec_size != inst->exec_size) break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 518827b..09d0a4e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -341,9 +341,8 @@ region_contained_in(const fs_reg &src, unsigned regs_read, const fs_reg &dst, unsigned regs_written) { return src.file == dst.file && src.nr == dst.nr && - (src.reg_offset * REG_SIZE + src.subreg_offset >= - dst.reg_offset * REG_SIZE + dst.subreg_offset) && - src.reg_offset + regs_read <= dst.reg_offset + regs_written; + (src.offset + src.subreg_offset >= dst.offset + dst.subreg_offset) && + src.offset / REG_SIZE + regs_read <= dst.offset / REG_SIZE + regs_written; } bool @@ -462,8 +461,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->saturate = inst->saturate || entry->saturate; /* Compute the offset of inst->src[arg] relative to entry->dst */ - const unsigned rel_offset = (inst->src[arg].reg_offset - - entry->dst.reg_offset) * REG_SIZE + + const unsigned rel_offset = inst->src[arg].offset - entry->dst.offset + inst->src[arg].subreg_offset; /* Compute the first component of the copy that the instruction is @@ -484,8 +482,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) */ const unsigned offset = suboffset + component * entry->src.stride * type_sz(entry->src.type) + - entry->src.reg_offset * reg_size + entry->src.subreg_offset; - inst->src[arg].reg_offset = offset / reg_size; + entry->src.offset + entry->src.subreg_offset; + inst->src[arg].offset = ROUND_DOWN_TO(offset, reg_size); inst->src[arg].subreg_offset = offset % reg_size; if (has_source_modifiers) { @@ -747,7 +745,7 @@ can_propagate_from(fs_inst *inst) inst->dst.file == VGRF && ((inst->src[0].file == VGRF && (inst->src[0].nr != inst->dst.nr || - inst->src[0].reg_offset != inst->dst.reg_offset)) || + inst->src[0].offset / REG_SIZE != inst->dst.offset / REG_SIZE)) || inst->src[0].file == ATTR || inst->src[0].file == UNIFORM || inst->src[0].file == IMM) && @@ -824,7 +822,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, if (inst->src[i].file == VGRF) { acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); entry->dst = inst->dst; - entry->dst.reg_offset += offset; + entry->dst.offset += offset * REG_SIZE; entry->src = inst->src[i]; entry->regs_written = regs_written; entry->regs_read = inst->regs_read(i); diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 0c769dd..0c65c5b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -221,7 +221,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; - src.reg_offset++; + src.offset += REG_SIZE; } for (int i = header_size; i < sources; i++) { payload[i] = src; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 2f4ba7b..2a45c37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -56,7 +56,7 @@ brw_file_from_reg(fs_reg *reg) static struct brw_reg brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed) { - assert(reg->reg_offset == 0); + assert(reg->offset / REG_SIZE == 0); struct brw_reg brw_reg; switch (reg->file) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 8bd4229..02dc777 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -120,7 +120,7 @@ fs_live_variables::setup_def_use() for (int j = 0; j < inst->regs_read(i); j++) { setup_one_read(bd, inst, ip, reg); - reg.reg_offset++; + reg.offset += REG_SIZE; } } @@ -131,7 +131,7 @@ fs_live_variables::setup_def_use() fs_reg reg = inst->dst; for (int j = 0; j < inst->regs_written; j++) { setup_one_write(bd, inst, ip, reg); - reg.reg_offset++; + reg.offset += REG_SIZE; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h index 96cadea..91d1e42 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h @@ -68,7 +68,7 @@ public: bool vars_interfere(int a, int b); int var_from_reg(const fs_reg ®) const { - return var_from_vgrf[reg.nr] + reg.reg_offset; + return var_from_vgrf[reg.nr] + reg.offset / REG_SIZE; } /** Map from virtual GRF number to index in block_data arrays. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index eae726d..a0d3187 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3999,7 +3999,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (const_offset) { /* Offsets are in bytes but they should always be multiples of 4 */ assert(const_offset->u32[0] % 4 == 0); - src.reg_offset = const_offset->u32[0] / 4; + src.offset = const_offset->u32[0] + src.offset % 4; for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5576163..82adaa3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -36,8 +36,8 @@ static void assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { if (reg->file == VGRF) { - reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset; - reg->reg_offset = 0; + reg->nr = reg_hw_locations[reg->nr] + reg->offset / REG_SIZE; + reg->offset %= REG_SIZE; } } @@ -780,7 +780,7 @@ emit_unspill(const fs_builder &bld, fs_reg dst, unspill_inst->mlen = 1; /* header contains offset */ } - dst.reg_offset += reg_size; + dst.offset += reg_size * REG_SIZE; spill_offset += reg_size * REG_SIZE; } } @@ -796,7 +796,7 @@ emit_spill(const fs_builder &bld, fs_reg src, for (unsigned i = 0; i < count / reg_size; i++) { fs_inst *spill_inst = bld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src); - src.reg_offset += reg_size; + src.offset += reg_size * REG_SIZE; spill_inst->offset = spill_offset + i * reg_size * REG_SIZE; spill_inst->mlen = 1 + reg_size; /* header, value */ spill_inst->base_mrf = spill_base_mrf(bld.shader); @@ -904,12 +904,12 @@ fs_visitor::spill_reg(int spill_reg) if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg) { int regs_read = inst->regs_read(i); - int subset_spill_offset = (spill_offset + - REG_SIZE * inst->src[i].reg_offset); + int subset_spill_offset = spill_offset + + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE); fs_reg unspill_dst(VGRF, alloc.allocate(regs_read)); inst->src[i].nr = unspill_dst.nr; - inst->src[i].reg_offset = 0; + inst->src[i].offset %= REG_SIZE; /* We read the largest power-of-two divisor of the register count * (because only POT scratch read blocks are allowed by the @@ -932,12 +932,12 @@ fs_visitor::spill_reg(int spill_reg) if (inst->dst.file == VGRF && inst->dst.nr == spill_reg) { - int subset_spill_offset = (spill_offset + - REG_SIZE * inst->dst.reg_offset); + int subset_spill_offset = spill_offset + + ROUND_DOWN_TO(inst->dst.offset, REG_SIZE); fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written)); inst->dst.nr = spill_src.nr; - inst->dst.reg_offset = 0; + inst->dst.offset %= REG_SIZE; /* If we're immediately spilling the register, we should not use * destination dependency hints. Doing so will cause the GPU do diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 4578ad5..651c136 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -50,7 +50,7 @@ is_nop_mov(const fs_inst *inst) if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { fs_reg dst = inst->dst; for (int i = 0; i < inst->sources; i++) { - dst.reg_offset = i; + dst.offset = i * REG_SIZE + dst.offset % REG_SIZE; if (!dst.equals(inst->src[i])) { return false; } @@ -192,7 +192,7 @@ fs_visitor::register_coalesce() mov[0] = inst; channels_remaining -= inst->regs_written; } else { - const int offset = inst->src[0].reg_offset; + const int offset = inst->src[0].offset / REG_SIZE; if (mov[offset]) { /* This is the second time that this offset in the register has * been set. This means, in particular, that inst->dst was @@ -203,9 +203,9 @@ fs_visitor::register_coalesce() channels_remaining = -1; continue; } - dst_reg_offset[offset] = inst->dst.reg_offset; + dst_reg_offset[offset] = inst->dst.offset / REG_SIZE; if (inst->regs_written > 1) - dst_reg_offset[offset + 1] = inst->dst.reg_offset + 1; + dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1; mov[offset] = inst; channels_remaining -= inst->regs_written; } @@ -253,16 +253,16 @@ fs_visitor::register_coalesce() if (scan_inst->dst.file == VGRF && scan_inst->dst.nr == src_reg) { scan_inst->dst.nr = dst_reg; - scan_inst->dst.reg_offset = - dst_reg_offset[scan_inst->dst.reg_offset]; + scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE + + dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE; } for (int j = 0; j < scan_inst->sources; j++) { if (scan_inst->src[j].file == VGRF && scan_inst->src[j].nr == src_reg) { scan_inst->src[j].nr = dst_reg; - scan_inst->src[j].reg_offset = - dst_reg_offset[scan_inst->src[j].reg_offset]; + scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE + + dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE; } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index f59fdbd..60bb1c0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -116,7 +116,8 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) for (int i = 0; i < scan_inst->sources; i++) { if (scan_inst->src[i].file == VGRF && scan_inst->src[i].nr == inst->src[0].nr && - scan_inst->src[i].reg_offset == inst->src[0].reg_offset) { + scan_inst->src[i].offset / REG_SIZE == + inst->src[0].offset / REG_SIZE) { if (scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate || scan_inst->src[0].abs || diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp index 90edd02..10ad7c3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp @@ -43,13 +43,13 @@ fs_visitor::validate() { foreach_block_and_inst (block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { - fsv_assert(inst->dst.reg_offset + inst->regs_written <= + fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <= alloc.sizes[inst->dst.nr]); } for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <= + fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <= (int)alloc.sizes[inst->src[i].nr]); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 48b5f40..d0f504c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -43,21 +43,21 @@ fs_visitor::emit_vs_system_value(int location) switch (location) { case SYSTEM_VALUE_BASE_VERTEX: - reg->reg_offset = 0; + reg->offset = 0; vs_prog_data->uses_basevertex = true; break; case SYSTEM_VALUE_BASE_INSTANCE: - reg->reg_offset = 1; + reg->offset = REG_SIZE; vs_prog_data->uses_baseinstance = true; break; case SYSTEM_VALUE_VERTEX_ID: unreachable("should have been lowered"); case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - reg->reg_offset = 2; + reg->offset = 2 * REG_SIZE; vs_prog_data->uses_vertexid = true; break; case SYSTEM_VALUE_INSTANCE_ID: - reg->reg_offset = 3; + reg->offset = 3 * REG_SIZE; vs_prog_data->uses_instanceid = true; break; case SYSTEM_VALUE_DRAW_ID: @@ -67,7 +67,7 @@ fs_visitor::emit_vs_system_value(int location) BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) reg->nr += 4; - reg->reg_offset = 0; + reg->offset = 0; vs_prog_data->uses_drawid = true; break; default: @@ -574,7 +574,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) for (int i = 0; i < key->nr_userclip_plane_consts; i++) { fs_reg u = userplane[i]; fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4]; - output.reg_offset = i & 3; + output.offset = output.offset % REG_SIZE + (i & 3) * REG_SIZE; abld.MUL(output, outputs[clip_vertex], u); for (int j = 1; j < 4; j++) { diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index dd5f82a..10da31e 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -88,7 +88,7 @@ byte_offset(fs_reg reg, unsigned delta) case UNIFORM: { const unsigned reg_size = (reg.file == UNIFORM ? 4 : REG_SIZE); const unsigned suboffset = reg.subreg_offset + delta; - reg.reg_offset += suboffset / reg_size; + reg.offset += ROUND_DOWN_TO(suboffset, reg_size); reg.subreg_offset = suboffset % reg_size; break; } @@ -192,8 +192,8 @@ reg_space(const fs_reg &r) static inline unsigned reg_offset(const fs_reg &r) { - return ((r.file == VGRF || r.file == IMM ? 0 : r.nr) + r.reg_offset) * - (r.file == UNIFORM ? 4 : REG_SIZE) + r.subreg_offset; + return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * + (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + r.subreg_offset; } /** diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index ff733c8..dde7554 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -1008,7 +1008,8 @@ fs_instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->src[i].nr + r], n); } else { for (int r = 0; r < inst->regs_read(i); r++) { - add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], n); + add_dep(last_grf_write[inst->src[i].nr * 16 + + inst->src[i].offset / REG_SIZE + r], n); } } } else if (inst->src[i].file == FIXED_GRF) { @@ -1057,8 +1058,10 @@ fs_instruction_scheduler::calculate_deps() } } else { for (int r = 0; r < inst->regs_written; r++) { - add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r], n); - last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n; + add_dep(last_grf_write[inst->dst.nr * 16 + + inst->dst.offset / REG_SIZE + r], n); + last_grf_write[inst->dst.nr * 16 + + inst->dst.offset / REG_SIZE + r] = n; } } } else if (inst->dst.file == MRF) { @@ -1131,7 +1134,8 @@ fs_instruction_scheduler::calculate_deps() add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { for (int r = 0; r < inst->regs_read(i); r++) { - add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], 0); + add_dep(n, last_grf_write[inst->src[i].nr * 16 + + inst->src[i].offset / REG_SIZE + r], 0); } } } else if (inst->src[i].file == FIXED_GRF) { @@ -1180,7 +1184,8 @@ fs_instruction_scheduler::calculate_deps() last_grf_write[inst->dst.nr + r] = n; } else { for (int r = 0; r < inst->regs_written; r++) { - last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n; + last_grf_write[inst->dst.nr * 16 + + inst->dst.offset / REG_SIZE + r] = n; } } } else if (inst->dst.file == MRF) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index ea39252..29435f6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -672,7 +672,7 @@ backend_shader::backend_shader(const struct brw_compiler *compiler, bool backend_reg::equals(const backend_reg &r) const { - return brw_regs_equal(this, &r) && reg_offset == r.reg_offset; + return brw_regs_equal(this, &r) && offset == r.offset; } bool @@ -750,7 +750,9 @@ backend_reg::in_range(const backend_reg &r, unsigned n) const return (file == r.file && nr == r.nr && reg_offset >= r.reg_offset && - reg_offset < r.reg_offset + n); + reg_offset < r.reg_offset + n && + offset >= r.offset && + offset < r.offset + n * REG_SIZE); } bool diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 0102098..72b94b6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -44,14 +44,14 @@ struct backend_reg : private brw_reg const brw_reg &as_brw_reg() const { assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); - assert(reg_offset == 0); + assert(reg_offset == 0 && offset == 0); return static_cast<const brw_reg &>(*this); } brw_reg &as_brw_reg() { assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM); - assert(reg_offset == 0); + assert(reg_offset == 0 && offset == 0); return static_cast<brw_reg &>(*this); } @@ -75,6 +75,9 @@ struct backend_reg : private brw_reg */ uint16_t reg_offset; + /** Offset from the start of the (virtual) register in bytes. */ + uint16_t offset; + using brw_reg::type; using brw_reg::file; using brw_reg::negate; |