diff options
author | Francisco Jerez <currojerez@riseup.net> | 2013-12-08 04:57:35 +0100 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2014-02-12 23:07:57 +0100 |
commit | 756d37b1d6d09ad7ee3b8835888a49d4256e427b (patch) | |
tree | 14c42ac76c4de97878daf36da4953868288323e3 /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | |
parent | 4c7206bafdd7bde7617e14840812e43459682718 (diff) | |
download | external_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.zip external_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.tar.gz external_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.tar.bz2 |
i965/fs: Add support for specifying register horizontal strides.
v2: Some improvements for copy propagation with non-contiguous
register strides and mismatching types.
v3: Add example of the situation that the copy propagation changes are
intended to avoid. Clarify that 'fs_reg::apply_stride()' is expected
to work with zero strides too.
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 36 |
1 files changed, 33 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 5cf019c..cdb7b80 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -279,7 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.file == IMM) return false; - if (inst->regs_read(this, arg) > 1) + /* Bail if inst is reading more than entry is writing. */ + if ((inst->regs_read(this, arg) * inst->src[arg].stride * + type_sz(inst->src[arg].type)) > type_sz(entry->dst.type)) return false; if (inst->src[arg].file != entry->dst.file || @@ -298,7 +300,32 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) bool has_source_modifiers = entry->src.abs || entry->src.negate; if ((has_source_modifiers || entry->src.file == UNIFORM || - entry->src.smear != -1) && !can_do_source_mods(inst)) + entry->src.smear != -1 || !entry->src.is_contiguous()) && + !can_do_source_mods(inst)) + return false; + + /* Bail if the result of composing both strides would exceed the + * hardware limit. + */ + if (entry->src.stride * inst->src[arg].stride > 4) + return false; + + /* Bail if the result of composing both strides cannot be expressed + * as another stride. This avoids, for example, trying to transform + * this: + * + * MOV (8) rX<1>UD rY<0;1,0>UD + * FOO (8) ... rX<8;8,1>UW + * + * into this: + * + * FOO (8) ... rY<0;1,0>UW + * + * Which would have different semantics. + */ + if (entry->src.stride != 1 && + (inst->src[arg].stride * + type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0) return false; if (has_source_modifiers && entry->dst.type != inst->src[arg].type) @@ -310,6 +337,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.smear != -1) inst->src[arg].smear = entry->src.smear; inst->src[arg].subreg_offset = entry->src.subreg_offset; + inst->src[arg].stride *= entry->src.stride; if (!inst->src[arg].abs) { inst->src[arg].abs = entry->src.abs; @@ -332,7 +360,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) if (inst->src[i].file != entry->dst.file || inst->src[i].reg != entry->dst.reg || inst->src[i].reg_offset != entry->dst.reg_offset || - inst->src[i].subreg_offset != entry->dst.subreg_offset) + inst->src[i].subreg_offset != entry->dst.subreg_offset || + inst->src[i].type != entry->dst.type || + inst->src[i].stride > 1) continue; /* Don't bother with cases that should have been taken care of by the |