summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
diff options
context:
space:
mode:
authorIago Toral Quiroga <itoral@igalia.com>2016-03-11 14:35:07 +0100
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>2016-05-16 09:55:32 +0200
commitcf375a3333e54a01462f192202d609436e5fbec8 (patch)
treea43182aa9bfeada61952bc47b6b5f82402939bbd /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
parentea1ef49a16dc429c50ece388e92bf206ccf282a7 (diff)
downloadexternal_mesa3d-cf375a3333e54a01462f192202d609436e5fbec8.zip
external_mesa3d-cf375a3333e54a01462f192202d609436e5fbec8.tar.gz
external_mesa3d-cf375a3333e54a01462f192202d609436e5fbec8.tar.bz2
i965/fs: fix copy propagation of partially invalidated entries
We were not invalidating entries with a src that reads more than one register when we find writes that overwrite any register read by entry->src after the first. This leads to incorrect copy propagation because we re-use entries from the ACP that have been partially invalidated. Same thing for entries with a dst that writes to more than one register. v2 (Sam): - Improve code by defining regions_overlap() and using it instead of a loop (Curro). Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp35
1 files changed, 27 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 23c545b..4b73b07 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -44,6 +44,7 @@ struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
uint8_t regs_written;
+ uint8_t regs_read;
enum opcode opcode;
bool saturate;
};
@@ -702,6 +703,14 @@ can_propagate_from(fs_inst *inst)
!inst->is_partial_write());
}
+inline bool
+regions_overlap(const fs_reg &r, unsigned n, const fs_reg &s, unsigned m)
+{
+ return r.file == s.file && r.nr == s.nr &&
+ !(r.reg_offset + n < s.reg_offset ||
+ s.reg_offset + m < r.reg_offset);
+}
+
/* Walks a basic block and does copy propagation on it using the acp
* list.
*/
@@ -728,18 +737,26 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
/* kill the destination from the ACP */
if (inst->dst.file == VGRF) {
foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
- if (inst->overwrites_reg(entry->dst)) {
- entry->remove();
- }
- }
+ if (regions_overlap(entry->dst, entry->regs_written,
+ inst->dst, inst->regs_written)) {
+ entry->remove();
+ break;
+ }
+ }
/* Oops, we only have the chaining hash based on the destination, not
* the source, so walk across the entire table.
*/
for (int i = 0; i < ACP_HASH_SIZE; i++) {
foreach_in_list_safe(acp_entry, entry, &acp[i]) {
- if (inst->overwrites_reg(entry->src))
+ /* Make sure we kill the entry if this instruction overwrites
+ * _any_ of the registers that it reads
+ */
+ if (regions_overlap(entry->src, entry->regs_read,
+ inst->dst, inst->regs_written)) {
entry->remove();
+ continue;
+ }
}
}
}
@@ -748,10 +765,11 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
* operand of another instruction, add it to the ACP.
*/
if (can_propagate_from(inst)) {
- acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
- entry->dst = inst->dst;
- entry->src = inst->src[0];
+ acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
+ entry->dst = inst->dst;
+ entry->src = inst->src[0];
entry->regs_written = inst->regs_written;
+ entry->regs_read = inst->regs_read(0);
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
@@ -769,6 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
entry->dst.reg_offset = offset;
entry->src = inst->src[i];
entry->regs_written = regs_written;
+ entry->regs_read = inst->regs_read(i);
entry->opcode = inst->opcode;
if (!entry->dst.equals(inst->src[i])) {
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);