summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2013-12-08 04:57:35 +0100
committerFrancisco Jerez <currojerez@riseup.net>2014-02-12 23:07:57 +0100
commit756d37b1d6d09ad7ee3b8835888a49d4256e427b (patch)
tree14c42ac76c4de97878daf36da4953868288323e3 /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
parent4c7206bafdd7bde7617e14840812e43459682718 (diff)
downloadexternal_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.zip
external_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.tar.gz
external_mesa3d-756d37b1d6d09ad7ee3b8835888a49d4256e427b.tar.bz2
i965/fs: Add support for specifying register horizontal strides.
v2: Some improvements for copy propagation with non-contiguous register strides and mismatching types. v3: Add example of the situation that the copy propagation changes are intended to avoid. Clarify that 'fs_reg::apply_stride()' is expected to work with zero strides too. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Paul Berry <stereotype441@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp36
1 files changed, 33 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5cf019c..cdb7b80 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,7 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.file == IMM)
return false;
- if (inst->regs_read(this, arg) > 1)
+ /* Bail if inst is reading more than entry is writing. */
+ if ((inst->regs_read(this, arg) * inst->src[arg].stride *
+ type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
return false;
if (inst->src[arg].file != entry->dst.file ||
@@ -298,7 +300,32 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
bool has_source_modifiers = entry->src.abs || entry->src.negate;
if ((has_source_modifiers || entry->src.file == UNIFORM ||
- entry->src.smear != -1) && !can_do_source_mods(inst))
+ entry->src.smear != -1 || !entry->src.is_contiguous()) &&
+ !can_do_source_mods(inst))
+ return false;
+
+ /* Bail if the result of composing both strides would exceed the
+ * hardware limit.
+ */
+ if (entry->src.stride * inst->src[arg].stride > 4)
+ return false;
+
+ /* Bail if the result of composing both strides cannot be expressed
+ * as another stride. This avoids, for example, trying to transform
+ * this:
+ *
+ * MOV (8) rX<1>UD rY<0;1,0>UD
+ * FOO (8) ... rX<8;8,1>UW
+ *
+ * into this:
+ *
+ * FOO (8) ... rY<0;1,0>UW
+ *
+ * Which would have different semantics.
+ */
+ if (entry->src.stride != 1 &&
+ (inst->src[arg].stride *
+ type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
return false;
if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
@@ -310,6 +337,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.smear != -1)
inst->src[arg].smear = entry->src.smear;
inst->src[arg].subreg_offset = entry->src.subreg_offset;
+ inst->src[arg].stride *= entry->src.stride;
if (!inst->src[arg].abs) {
inst->src[arg].abs = entry->src.abs;
@@ -332,7 +360,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
if (inst->src[i].file != entry->dst.file ||
inst->src[i].reg != entry->dst.reg ||
inst->src[i].reg_offset != entry->dst.reg_offset ||
- inst->src[i].subreg_offset != entry->dst.subreg_offset)
+ inst->src[i].subreg_offset != entry->dst.subreg_offset ||
+ inst->src[i].type != entry->dst.type ||
+ inst->src[i].stride > 1)
continue;
/* Don't bother with cases that should have been taken care of by the