summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp36
1 files changed, 33 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5cf019c..cdb7b80 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,7 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.file == IMM)
return false;
- if (inst->regs_read(this, arg) > 1)
+ /* Bail if inst is reading more than entry is writing. */
+ if ((inst->regs_read(this, arg) * inst->src[arg].stride *
+ type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
return false;
if (inst->src[arg].file != entry->dst.file ||
@@ -298,7 +300,32 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
bool has_source_modifiers = entry->src.abs || entry->src.negate;
if ((has_source_modifiers || entry->src.file == UNIFORM ||
- entry->src.smear != -1) && !can_do_source_mods(inst))
+ entry->src.smear != -1 || !entry->src.is_contiguous()) &&
+ !can_do_source_mods(inst))
+ return false;
+
+ /* Bail if the result of composing both strides would exceed the
+ * hardware limit.
+ */
+ if (entry->src.stride * inst->src[arg].stride > 4)
+ return false;
+
+ /* Bail if the result of composing both strides cannot be expressed
+ * as another stride. This avoids, for example, trying to transform
+ * this:
+ *
+ * MOV (8) rX<1>UD rY<0;1,0>UD
+ * FOO (8) ... rX<8;8,1>UW
+ *
+ * into this:
+ *
+ * FOO (8) ... rY<0;1,0>UW
+ *
+ * Which would have different semantics.
+ */
+ if (entry->src.stride != 1 &&
+ (inst->src[arg].stride *
+ type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
return false;
if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
@@ -310,6 +337,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.smear != -1)
inst->src[arg].smear = entry->src.smear;
inst->src[arg].subreg_offset = entry->src.subreg_offset;
+ inst->src[arg].stride *= entry->src.stride;
if (!inst->src[arg].abs) {
inst->src[arg].abs = entry->src.abs;
@@ -332,7 +360,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
if (inst->src[i].file != entry->dst.file ||
inst->src[i].reg != entry->dst.reg ||
inst->src[i].reg_offset != entry->dst.reg_offset ||
- inst->src[i].subreg_offset != entry->dst.subreg_offset)
+ inst->src[i].subreg_offset != entry->dst.subreg_offset ||
+ inst->src[i].type != entry->dst.type ||
+ inst->src[i].stride > 1)
continue;
/* Don't bother with cases that should have been taken care of by the