diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 5cf019c..cdb7b80 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -279,7 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.file == IMM) return false; - if (inst->regs_read(this, arg) > 1) + /* Bail if inst is reading more than entry is writing. */ + if ((inst->regs_read(this, arg) * inst->src[arg].stride * + type_sz(inst->src[arg].type)) > type_sz(entry->dst.type)) return false; if (inst->src[arg].file != entry->dst.file || @@ -298,7 +300,32 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) bool has_source_modifiers = entry->src.abs || entry->src.negate; if ((has_source_modifiers || entry->src.file == UNIFORM || - entry->src.smear != -1) && !can_do_source_mods(inst)) + entry->src.smear != -1 || !entry->src.is_contiguous()) && + !can_do_source_mods(inst)) + return false; + + /* Bail if the result of composing both strides would exceed the + * hardware limit. + */ + if (entry->src.stride * inst->src[arg].stride > 4) + return false; + + /* Bail if the result of composing both strides cannot be expressed + * as another stride. This avoids, for example, trying to transform + * this: + * + * MOV (8) rX<1>UD rY<0;1,0>UD + * FOO (8) ... rX<8;8,1>UW + * + * into this: + * + * FOO (8) ... rY<0;1,0>UW + * + * Which would have different semantics. + */ + if (entry->src.stride != 1 && + (inst->src[arg].stride * + type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0) return false; if (has_source_modifiers && entry->dst.type != inst->src[arg].type) @@ -310,6 +337,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.smear != -1) inst->src[arg].smear = entry->src.smear; inst->src[arg].subreg_offset = entry->src.subreg_offset; + inst->src[arg].stride *= entry->src.stride; if (!inst->src[arg].abs) { inst->src[arg].abs = entry->src.abs; @@ -332,7 +360,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) if (inst->src[i].file != entry->dst.file || inst->src[i].reg != entry->dst.reg || inst->src[i].reg_offset != entry->dst.reg_offset || - inst->src[i].subreg_offset != entry->dst.subreg_offset) + inst->src[i].subreg_offset != entry->dst.subreg_offset || + inst->src[i].type != entry->dst.type || + inst->src[i].stride > 1) continue; /* Don't bother with cases that should have been taken care of by the |