summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp46
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h6
3 files changed, 34 insertions, 32 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0244f59..8f1cd61 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -818,7 +818,7 @@ fs_inst::components_read(unsigned i) const
}
int
-fs_inst::regs_read(int arg) const
+fs_inst::size_read(int arg) const
{
switch (opcode) {
case FS_OPCODE_FB_WRITE:
@@ -837,28 +837,28 @@ fs_inst::regs_read(int arg) const
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
if (arg == 0)
- return mlen;
+ return mlen * REG_SIZE;
break;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
/* The payload is actually stored in src1 */
if (arg == 1)
- return mlen;
+ return mlen * REG_SIZE;
break;
case FS_OPCODE_LINTERP:
if (arg == 1)
- return 1;
+ return REG_SIZE;
break;
case SHADER_OPCODE_LOAD_PAYLOAD:
if (arg < this->header_size)
- return 1;
+ return REG_SIZE;
break;
case CS_OPCODE_CS_TERMINATE:
case SHADER_OPCODE_BARRIER:
- return 1;
+ return REG_SIZE;
case SHADER_OPCODE_MOV_INDIRECT:
if (arg == 0) {
@@ -867,7 +867,7 @@ fs_inst::regs_read(int arg) const
if (src[0].file == UNIFORM) {
assert(region_length % 4 == 0);
- return region_length / 4;
+ return region_length;
} else if (src[0].file == FIXED_GRF) {
/* If the start of the region is not register aligned, then
* there's some portion of the register that's technically
@@ -884,7 +884,7 @@ fs_inst::regs_read(int arg) const
if (src[0].subnr)
region_length += src[0].subnr;
- return DIV_ROUND_UP(region_length, REG_SIZE);
+ return region_length;
} else {
assert(!"Invalid register file");
}
@@ -893,22 +893,20 @@ fs_inst::regs_read(int arg) const
default:
if (is_tex() && arg == 0 && src[0].file == VGRF)
- return mlen;
+ return mlen * REG_SIZE;
break;
}
switch (src[arg].file) {
case UNIFORM:
case IMM:
- return 1;
+ return 4;
case BAD_FILE:
case ARF:
case FIXED_GRF:
case VGRF:
case ATTR:
- return DIV_ROUND_UP(components_read(arg) *
- src[arg].component_size(exec_size),
- REG_SIZE);
+ return components_read(arg) * src[arg].component_size(exec_size);
case MRF:
unreachable("MRF registers are not allowed as sources");
}
@@ -2547,7 +2545,7 @@ fs_visitor::opt_sampler_eot()
for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
if (!fb_write->src[i].equals(tex_inst->dst) ||
- fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
+ fb_write->size_read(i) != tex_inst->size_written)
return false;
} else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
if (fb_write->src[i].file != BAD_FILE)
@@ -2730,7 +2728,7 @@ fs_visitor::compute_to_mrf()
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+ inst->src[0], inst->size_read(0))) {
/* Found the last thing to write our reg we want to turn
* into a compute-to-MRF.
*/
@@ -2749,7 +2747,7 @@ fs_visitor::compute_to_mrf()
*/
if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
- inst->src[0].offset / REG_SIZE + inst->regs_read(0))
+ inst->src[0].offset / REG_SIZE + DIV_ROUND_UP(inst->size_read(0), REG_SIZE))
break;
/* SEND instructions can't have MRF as a destination. */
@@ -2785,8 +2783,8 @@ fs_visitor::compute_to_mrf()
*/
bool interfered = false;
for (int i = 0; i < scan_inst->sources; i++) {
- if (regions_overlap(scan_inst->src[i], scan_inst->regs_read(i) * REG_SIZE,
- inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+ if (regions_overlap(scan_inst->src[i], scan_inst->size_read(i),
+ inst->src[0], inst->size_read(0))) {
interfered = true;
}
}
@@ -2823,7 +2821,7 @@ fs_visitor::compute_to_mrf()
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written,
- inst->src[0], inst->regs_read(0) * REG_SIZE)) {
+ inst->src[0], inst->size_read(0))) {
/* Clear the bits for any registers this instruction overwrites. */
regs_left &= ~mask_relative_to(
inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
@@ -3027,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes()
if (last_mrf_move[i] &&
regions_overlap(inst->dst, inst->size_written,
last_mrf_move[i]->src[0],
- last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
+ last_mrf_move[i]->size_read(0))) {
last_mrf_move[i] = NULL;
}
}
@@ -4607,7 +4605,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
for (unsigned i = 0; i < inst->sources; i++)
- reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
+ reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
/* Calculate the maximum execution size of the instruction based on the
* factor by which it goes over the hardware limit of 2 GRFs.
@@ -4632,7 +4630,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
if (devinfo->gen < 8) {
for (unsigned i = 0; i < inst->sources; i++) {
if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
- inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
+ inst->size_read(i) != 0 && DIV_ROUND_UP(inst->size_read(i), REG_SIZE) != 2 &&
!is_uniform(inst->src[i]) &&
!(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
@@ -5114,7 +5112,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
* the data read from the same source by other lowered instructions.
*/
if (regions_overlap(inst->dst, inst->size_written,
- inst->src[i], inst->regs_read(i) * REG_SIZE) &&
+ inst->src[i], inst->size_read(i)) &&
!inst->dst.equals(inst->src[i]))
return true;
}
@@ -5371,7 +5369,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
switch (inst->src[i].file) {
case VGRF:
fprintf(file, "vgrf%d", inst->src[i].nr);
- if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
+ if (alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i) ||
inst->src[i].offset % REG_SIZE != 0)
fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE,
inst->src[i].offset % REG_SIZE);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 0e239d2..f8238aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -44,7 +44,7 @@ struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
uint8_t size_written;
- uint8_t regs_read;
+ uint8_t size_read;
enum opcode opcode;
bool saturate;
};
@@ -367,7 +367,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
/* Bail if inst is reading a range that isn't contained in the range
* that entry is writing.
*/
- if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
+ if (!region_contained_in(inst->src[arg], DIV_ROUND_UP(inst->size_read(arg),
+ REG_SIZE),
entry->dst, DIV_ROUND_UP(entry->size_written,
REG_SIZE)))
return false;
@@ -524,7 +525,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
/* Bail if inst is reading a range that isn't contained in the range
* that entry is writing.
*/
- if (!region_contained_in(inst->src[i], inst->regs_read(i),
+ if (!region_contained_in(inst->src[i], DIV_ROUND_UP(inst->size_read(i),
+ REG_SIZE),
entry->dst, DIV_ROUND_UP(entry->size_written,
REG_SIZE)))
continue;
@@ -785,7 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
/* Make sure we kill the entry if this instruction overwrites
* _any_ of the registers that it reads
*/
- if (regions_overlap(entry->src, entry->regs_read * REG_SIZE,
+ if (regions_overlap(entry->src, entry->size_read,
inst->dst, inst->size_written))
entry->remove();
}
@@ -800,7 +802,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
entry->dst = inst->dst;
entry->src = inst->src[0];
entry->size_written = inst->size_written;
- entry->regs_read = inst->regs_read(0);
+ entry->size_read = inst->size_read(0);
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
@@ -818,7 +820,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
entry->dst.offset += offset * REG_SIZE;
entry->src = inst->src[i];
entry->size_written = size_written;
- entry->regs_read = inst->regs_read(i);
+ entry->size_read = inst->size_read(i);
entry->opcode = inst->opcode;
if (!entry->dst.equals(inst->src[i])) {
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index cea81e4..2e5c8e5 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -324,7 +324,7 @@ public:
bool is_partial_write() const;
bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
unsigned components_read(unsigned i) const;
- int regs_read(int arg) const;
+ int size_read(int arg) const;
bool can_do_source_mods(const struct gen_device_info *devinfo);
bool can_change_types() const;
bool has_side_effects() const;
@@ -435,7 +435,9 @@ inline unsigned
regs_read(const fs_inst *inst, unsigned i)
{
/* XXX - Take into account register-misaligned offsets correctly. */
- return inst->regs_read(i);
+ const unsigned reg_size =
+ inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
+ return DIV_ROUND_UP(inst->size_read(i), reg_size);
}
#endif