summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp56
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp16
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_validate.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h7
17 files changed, 94 insertions, 82 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 75642d3..8401798 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -354,7 +354,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
return false;
fs_reg reg = this->src[0];
- if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0)
+ if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
return false;
if (grf_alloc.sizes[reg.nr] != this->regs_written)
@@ -366,7 +366,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
return false;
if (i < this->header_size) {
- reg.reg_offset += 1;
+ reg.offset += REG_SIZE;
} else {
reg = horiz_offset(reg, this->exec_size);
}
@@ -425,7 +425,7 @@ fs_reg::fs_reg()
fs_reg::fs_reg(struct ::brw_reg reg) :
backend_reg(reg)
{
- this->reg_offset = 0;
+ this->offset = 0;
this->subreg_offset = 0;
this->stride = 1;
if (this->file == IMM &&
@@ -1463,7 +1463,7 @@ fs_visitor::assign_curb_setup()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == UNIFORM) {
- int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
int constant_nr;
if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
constant_nr = push_constant_loc[uniform_nr];
@@ -1620,7 +1620,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
int grf = payload.num_regs +
prog_data->curb_read_length +
inst->src[i].nr +
- inst->src[i].reg_offset;
+ inst->src[i].offset / REG_SIZE;
/* As explained at brw_reg_from_fs_reg, From the Haswell PRM:
*
@@ -1773,13 +1773,13 @@ fs_visitor::split_virtual_grfs()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
- int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
+ int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
for (int j = 1; j < inst->regs_written; j++)
split_points[reg + j] = false;
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
- int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
+ int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
for (int j = 1; j < inst->regs_read(i); j++)
split_points[reg + j] = false;
}
@@ -1826,16 +1826,18 @@ fs_visitor::split_virtual_grfs()
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
- reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
+ reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
inst->dst.nr = new_virtual_grf[reg];
- inst->dst.reg_offset = new_reg_offset[reg];
+ inst->dst.offset = new_reg_offset[reg] * REG_SIZE +
+ inst->dst.offset % REG_SIZE;
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
- reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
+ reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
inst->src[i].nr = new_virtual_grf[reg];
- inst->src[i].reg_offset = new_reg_offset[reg];
+ inst->src[i].offset = new_reg_offset[reg] * REG_SIZE +
+ inst->src[i].offset % REG_SIZE;
assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
}
@@ -2001,7 +2003,7 @@ fs_visitor::assign_constant_locations()
if (inst->src[i].file != UNIFORM)
continue;
- int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].nr + inst->src[i].offset / 4;
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
assert(inst->src[2].ud % 4 == 0);
@@ -2155,7 +2157,7 @@ fs_visitor::lower_constant_loads()
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
continue;
- unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
+ unsigned location = inst->src[i].nr + inst->src[i].offset / 4;
if (location >= uniforms)
continue; /* Out of bounds access */
@@ -2182,7 +2184,7 @@ fs_visitor::lower_constant_loads()
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
- inst->src[i].reg_offset = 0;
+ inst->src[i].offset %= 4;
inst->src[i].set_smear((pull_index & 3) * 4 /
type_sz(inst->src[i].type));
@@ -2192,7 +2194,7 @@ fs_visitor::lower_constant_loads()
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
inst->src[0].file == UNIFORM) {
- unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
+ unsigned location = inst->src[0].nr + inst->src[0].offset / 4;
if (location >= uniforms)
continue; /* Out of bounds access */
@@ -2748,9 +2750,9 @@ fs_visitor::compute_to_mrf()
* would need us to understand coalescing out more than one MOV at
* a time.
*/
- if (scan_inst->dst.reg_offset < inst->src[0].reg_offset ||
- scan_inst->dst.reg_offset + scan_inst->regs_written >
- inst->src[0].reg_offset + inst->regs_read(0))
+ if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
+ scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written >
+ inst->src[0].offset / REG_SIZE + inst->regs_read(0))
break;
/* SEND instructions can't have MRF as a destination. */
@@ -2852,7 +2854,7 @@ fs_visitor::compute_to_mrf()
}
scan_inst->dst.file = MRF;
- scan_inst->dst.reg_offset = 0;
+ scan_inst->dst.offset %= REG_SIZE;
scan_inst->saturate |= inst->saturate;
if (!regs_left)
break;
@@ -3264,7 +3266,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
* mode. Reserve space for the register.
*/
offset = payload = fs_reg(VGRF, alloc.allocate(2));
- offset.reg_offset++;
+ offset.offset += REG_SIZE;
inst->mlen = 2;
} else {
offset = payload = fs_reg(VGRF, alloc.allocate(1));
@@ -5317,7 +5319,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
- inst->dst.reg_offset, inst->dst.subreg_offset);
+ inst->dst.offset / REG_SIZE, inst->dst.subreg_offset);
break;
case FIXED_GRF:
fprintf(file, "g%d", inst->dst.nr);
@@ -5329,10 +5331,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "(null)");
break;
case UNIFORM:
- fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset);
+ fprintf(file, "***u%d***", inst->dst.nr + inst->dst.offset / 4);
break;
case ATTR:
- fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset);
+ fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.offset / REG_SIZE);
break;
case ARF:
switch (inst->dst.nr) {
@@ -5372,7 +5374,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "vgrf%d", inst->src[i].nr);
if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
inst->src[i].subreg_offset)
- fprintf(file, "+%d.%d", inst->src[i].reg_offset,
+ fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE,
inst->src[i].subreg_offset);
break;
case FIXED_GRF:
@@ -5382,12 +5384,12 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset);
+ fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].offset / REG_SIZE);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
+ fprintf(file, "u%d", inst->src[i].nr + inst->src[i].offset / 4);
if (inst->src[i].subreg_offset) {
- fprintf(file, "+%d.%d", inst->src[i].reg_offset,
+ fprintf(file, "+%d.%d", inst->src[i].offset / 4,
inst->src[i].subreg_offset);
}
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 3b123a7..7607d20 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -90,7 +90,8 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->overwrites_reg(inst->src[0])) {
if (scan_inst->is_partial_write() ||
- scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+ scan_inst->dst.offset / REG_SIZE !=
+ inst->src[0].offset / REG_SIZE ||
scan_inst->exec_size != inst->exec_size)
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 518827b..09d0a4e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -341,9 +341,8 @@ region_contained_in(const fs_reg &src, unsigned regs_read,
const fs_reg &dst, unsigned regs_written)
{
return src.file == dst.file && src.nr == dst.nr &&
- (src.reg_offset * REG_SIZE + src.subreg_offset >=
- dst.reg_offset * REG_SIZE + dst.subreg_offset) &&
- src.reg_offset + regs_read <= dst.reg_offset + regs_written;
+ (src.offset + src.subreg_offset >= dst.offset + dst.subreg_offset) &&
+ src.offset / REG_SIZE + regs_read <= dst.offset / REG_SIZE + regs_written;
}
bool
@@ -462,8 +461,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
inst->saturate = inst->saturate || entry->saturate;
/* Compute the offset of inst->src[arg] relative to entry->dst */
- const unsigned rel_offset = (inst->src[arg].reg_offset
- - entry->dst.reg_offset) * REG_SIZE +
+ const unsigned rel_offset = inst->src[arg].offset - entry->dst.offset +
inst->src[arg].subreg_offset;
/* Compute the first component of the copy that the instruction is
@@ -484,8 +482,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
*/
const unsigned offset = suboffset +
component * entry->src.stride * type_sz(entry->src.type) +
- entry->src.reg_offset * reg_size + entry->src.subreg_offset;
- inst->src[arg].reg_offset = offset / reg_size;
+ entry->src.offset + entry->src.subreg_offset;
+ inst->src[arg].offset = ROUND_DOWN_TO(offset, reg_size);
inst->src[arg].subreg_offset = offset % reg_size;
if (has_source_modifiers) {
@@ -747,7 +745,7 @@ can_propagate_from(fs_inst *inst)
inst->dst.file == VGRF &&
((inst->src[0].file == VGRF &&
(inst->src[0].nr != inst->dst.nr ||
- inst->src[0].reg_offset != inst->dst.reg_offset)) ||
+ inst->src[0].offset / REG_SIZE != inst->dst.offset / REG_SIZE)) ||
inst->src[0].file == ATTR ||
inst->src[0].file == UNIFORM ||
inst->src[0].file == IMM) &&
@@ -824,7 +822,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
if (inst->src[i].file == VGRF) {
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
- entry->dst.reg_offset += offset;
+ entry->dst.offset += offset * REG_SIZE;
entry->src = inst->src[i];
entry->regs_written = regs_written;
entry->regs_read = inst->regs_read(i);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 0c769dd..0c65c5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -221,7 +221,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
for (int i = 0; i < header_size; i++) {
payload[i] = src;
- src.reg_offset++;
+ src.offset += REG_SIZE;
}
for (int i = header_size; i < sources; i++) {
payload[i] = src;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2f4ba7b..2a45c37 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -56,7 +56,7 @@ brw_file_from_reg(fs_reg *reg)
static struct brw_reg
brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
{
- assert(reg->reg_offset == 0);
+ assert(reg->offset / REG_SIZE == 0);
struct brw_reg brw_reg;
switch (reg->file) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 8bd4229..02dc777 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -120,7 +120,7 @@ fs_live_variables::setup_def_use()
for (int j = 0; j < inst->regs_read(i); j++) {
setup_one_read(bd, inst, ip, reg);
- reg.reg_offset++;
+ reg.offset += REG_SIZE;
}
}
@@ -131,7 +131,7 @@ fs_live_variables::setup_def_use()
fs_reg reg = inst->dst;
for (int j = 0; j < inst->regs_written; j++) {
setup_one_write(bd, inst, ip, reg);
- reg.reg_offset++;
+ reg.offset += REG_SIZE;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
index 96cadea..91d1e42 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
@@ -68,7 +68,7 @@ public:
bool vars_interfere(int a, int b);
int var_from_reg(const fs_reg &reg) const
{
- return var_from_vgrf[reg.nr] + reg.reg_offset;
+ return var_from_vgrf[reg.nr] + reg.offset / REG_SIZE;
}
/** Map from virtual GRF number to index in block_data arrays. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index eae726d..a0d3187 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3999,7 +3999,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 4 */
assert(const_offset->u32[0] % 4 == 0);
- src.reg_offset = const_offset->u32[0] / 4;
+ src.offset = const_offset->u32[0] + src.offset % 4;
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 5576163..82adaa3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -36,8 +36,8 @@ static void
assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
{
if (reg->file == VGRF) {
- reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
- reg->reg_offset = 0;
+ reg->nr = reg_hw_locations[reg->nr] + reg->offset / REG_SIZE;
+ reg->offset %= REG_SIZE;
}
}
@@ -780,7 +780,7 @@ emit_unspill(const fs_builder &bld, fs_reg dst,
unspill_inst->mlen = 1; /* header contains offset */
}
- dst.reg_offset += reg_size;
+ dst.offset += reg_size * REG_SIZE;
spill_offset += reg_size * REG_SIZE;
}
}
@@ -796,7 +796,7 @@ emit_spill(const fs_builder &bld, fs_reg src,
for (unsigned i = 0; i < count / reg_size; i++) {
fs_inst *spill_inst =
bld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src);
- src.reg_offset += reg_size;
+ src.offset += reg_size * REG_SIZE;
spill_inst->offset = spill_offset + i * reg_size * REG_SIZE;
spill_inst->mlen = 1 + reg_size; /* header, value */
spill_inst->base_mrf = spill_base_mrf(bld.shader);
@@ -904,12 +904,12 @@ fs_visitor::spill_reg(int spill_reg)
if (inst->src[i].file == VGRF &&
inst->src[i].nr == spill_reg) {
int regs_read = inst->regs_read(i);
- int subset_spill_offset = (spill_offset +
- REG_SIZE * inst->src[i].reg_offset);
+ int subset_spill_offset = spill_offset +
+ ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
inst->src[i].nr = unspill_dst.nr;
- inst->src[i].reg_offset = 0;
+ inst->src[i].offset %= REG_SIZE;
/* We read the largest power-of-two divisor of the register count
* (because only POT scratch read blocks are allowed by the
@@ -932,12 +932,12 @@ fs_visitor::spill_reg(int spill_reg)
if (inst->dst.file == VGRF &&
inst->dst.nr == spill_reg) {
- int subset_spill_offset = (spill_offset +
- REG_SIZE * inst->dst.reg_offset);
+ int subset_spill_offset = spill_offset +
+ ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
inst->dst.nr = spill_src.nr;
- inst->dst.reg_offset = 0;
+ inst->dst.offset %= REG_SIZE;
/* If we're immediately spilling the register, we should not use
* destination dependency hints. Doing so will cause the GPU do
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 4578ad5..651c136 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -50,7 +50,7 @@ is_nop_mov(const fs_inst *inst)
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
fs_reg dst = inst->dst;
for (int i = 0; i < inst->sources; i++) {
- dst.reg_offset = i;
+ dst.offset = i * REG_SIZE + dst.offset % REG_SIZE;
if (!dst.equals(inst->src[i])) {
return false;
}
@@ -192,7 +192,7 @@ fs_visitor::register_coalesce()
mov[0] = inst;
channels_remaining -= inst->regs_written;
} else {
- const int offset = inst->src[0].reg_offset;
+ const int offset = inst->src[0].offset / REG_SIZE;
if (mov[offset]) {
/* This is the second time that this offset in the register has
* been set. This means, in particular, that inst->dst was
@@ -203,9 +203,9 @@ fs_visitor::register_coalesce()
channels_remaining = -1;
continue;
}
- dst_reg_offset[offset] = inst->dst.reg_offset;
+ dst_reg_offset[offset] = inst->dst.offset / REG_SIZE;
if (inst->regs_written > 1)
- dst_reg_offset[offset + 1] = inst->dst.reg_offset + 1;
+ dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
mov[offset] = inst;
channels_remaining -= inst->regs_written;
}
@@ -253,16 +253,16 @@ fs_visitor::register_coalesce()
if (scan_inst->dst.file == VGRF &&
scan_inst->dst.nr == src_reg) {
scan_inst->dst.nr = dst_reg;
- scan_inst->dst.reg_offset =
- dst_reg_offset[scan_inst->dst.reg_offset];
+ scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE +
+ dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE;
}
for (int j = 0; j < scan_inst->sources; j++) {
if (scan_inst->src[j].file == VGRF &&
scan_inst->src[j].nr == src_reg) {
scan_inst->src[j].nr = dst_reg;
- scan_inst->src[j].reg_offset =
- dst_reg_offset[scan_inst->src[j].reg_offset];
+ scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE +
+ dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE;
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index f59fdbd..60bb1c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -116,7 +116,8 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
for (int i = 0; i < scan_inst->sources; i++) {
if (scan_inst->src[i].file == VGRF &&
scan_inst->src[i].nr == inst->src[0].nr &&
- scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+ scan_inst->src[i].offset / REG_SIZE ==
+ inst->src[0].offset / REG_SIZE) {
if (scan_inst->opcode != BRW_OPCODE_MOV ||
!scan_inst->saturate ||
scan_inst->src[0].abs ||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index 90edd02..10ad7c3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -43,13 +43,13 @@ fs_visitor::validate()
{
foreach_block_and_inst (block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
- fsv_assert(inst->dst.reg_offset + inst->regs_written <=
+ fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <=
alloc.sizes[inst->dst.nr]);
}
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
- fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
+ fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <=
(int)alloc.sizes[inst->src[i].nr]);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 48b5f40..d0f504c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -43,21 +43,21 @@ fs_visitor::emit_vs_system_value(int location)
switch (location) {
case SYSTEM_VALUE_BASE_VERTEX:
- reg->reg_offset = 0;
+ reg->offset = 0;
vs_prog_data->uses_basevertex = true;
break;
case SYSTEM_VALUE_BASE_INSTANCE:
- reg->reg_offset = 1;
+ reg->offset = REG_SIZE;
vs_prog_data->uses_baseinstance = true;
break;
case SYSTEM_VALUE_VERTEX_ID:
unreachable("should have been lowered");
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- reg->reg_offset = 2;
+ reg->offset = 2 * REG_SIZE;
vs_prog_data->uses_vertexid = true;
break;
case SYSTEM_VALUE_INSTANCE_ID:
- reg->reg_offset = 3;
+ reg->offset = 3 * REG_SIZE;
vs_prog_data->uses_instanceid = true;
break;
case SYSTEM_VALUE_DRAW_ID:
@@ -67,7 +67,7 @@ fs_visitor::emit_vs_system_value(int location)
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)))
reg->nr += 4;
- reg->reg_offset = 0;
+ reg->offset = 0;
vs_prog_data->uses_drawid = true;
break;
default:
@@ -574,7 +574,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
fs_reg u = userplane[i];
fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4];
- output.reg_offset = i & 3;
+ output.offset = output.offset % REG_SIZE + (i & 3) * REG_SIZE;
abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index dd5f82a..10da31e 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -88,7 +88,7 @@ byte_offset(fs_reg reg, unsigned delta)
case UNIFORM: {
const unsigned reg_size = (reg.file == UNIFORM ? 4 : REG_SIZE);
const unsigned suboffset = reg.subreg_offset + delta;
- reg.reg_offset += suboffset / reg_size;
+ reg.offset += ROUND_DOWN_TO(suboffset, reg_size);
reg.subreg_offset = suboffset % reg_size;
break;
}
@@ -192,8 +192,8 @@ reg_space(const fs_reg &r)
static inline unsigned
reg_offset(const fs_reg &r)
{
- return ((r.file == VGRF || r.file == IMM ? 0 : r.nr) + r.reg_offset) *
- (r.file == UNIFORM ? 4 : REG_SIZE) + r.subreg_offset;
+ return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
+ (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset + r.subreg_offset;
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index ff733c8..dde7554 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1008,7 +1008,8 @@ fs_instruction_scheduler::calculate_deps()
add_dep(last_grf_write[inst->src[i].nr + r], n);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], n);
+ add_dep(last_grf_write[inst->src[i].nr * 16 +
+ inst->src[i].offset / REG_SIZE + r], n);
}
}
} else if (inst->src[i].file == FIXED_GRF) {
@@ -1057,8 +1058,10 @@ fs_instruction_scheduler::calculate_deps()
}
} else {
for (int r = 0; r < inst->regs_written; r++) {
- add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r], n);
- last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
+ add_dep(last_grf_write[inst->dst.nr * 16 +
+ inst->dst.offset / REG_SIZE + r], n);
+ last_grf_write[inst->dst.nr * 16 +
+ inst->dst.offset / REG_SIZE + r] = n;
}
}
} else if (inst->dst.file == MRF) {
@@ -1131,7 +1134,8 @@ fs_instruction_scheduler::calculate_deps()
add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], 0);
+ add_dep(n, last_grf_write[inst->src[i].nr * 16 +
+ inst->src[i].offset / REG_SIZE + r], 0);
}
}
} else if (inst->src[i].file == FIXED_GRF) {
@@ -1180,7 +1184,8 @@ fs_instruction_scheduler::calculate_deps()
last_grf_write[inst->dst.nr + r] = n;
} else {
for (int r = 0; r < inst->regs_written; r++) {
- last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
+ last_grf_write[inst->dst.nr * 16 +
+ inst->dst.offset / REG_SIZE + r] = n;
}
}
} else if (inst->dst.file == MRF) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index ea39252..29435f6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -672,7 +672,7 @@ backend_shader::backend_shader(const struct brw_compiler *compiler,
bool
backend_reg::equals(const backend_reg &r) const
{
- return brw_regs_equal(this, &r) && reg_offset == r.reg_offset;
+ return brw_regs_equal(this, &r) && offset == r.offset;
}
bool
@@ -750,7 +750,9 @@ backend_reg::in_range(const backend_reg &r, unsigned n) const
return (file == r.file &&
nr == r.nr &&
reg_offset >= r.reg_offset &&
- reg_offset < r.reg_offset + n);
+ reg_offset < r.reg_offset + n &&
+ offset >= r.offset &&
+ offset < r.offset + n * REG_SIZE);
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 0102098..72b94b6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -44,14 +44,14 @@ struct backend_reg : private brw_reg
const brw_reg &as_brw_reg() const
{
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
- assert(reg_offset == 0);
+ assert(reg_offset == 0 && offset == 0);
return static_cast<const brw_reg &>(*this);
}
brw_reg &as_brw_reg()
{
assert(file == ARF || file == FIXED_GRF || file == MRF || file == IMM);
- assert(reg_offset == 0);
+ assert(reg_offset == 0 && offset == 0);
return static_cast<brw_reg &>(*this);
}
@@ -75,6 +75,9 @@ struct backend_reg : private brw_reg
*/
uint16_t reg_offset;
+ /** Offset from the start of the (virtual) register in bytes. */
+ uint16_t offset;
+
using brw_reg::type;
using brw_reg::file;
using brw_reg::negate;