summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
diff options
context:
space:
mode:
authorConnor Abbott <connor.w.abbott@intel.com>2015-08-14 12:00:13 -0700
committerIago Toral Quiroga <itoral@igalia.com>2015-11-23 08:30:30 +0100
commitd982922b184930a4ceed1d97b772cce5c371865d (patch)
tree9d0bd5957a0ac3b75b971caa3223c21c6ea89b81 /src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
parent95ac3b1daeaa7d40d49fa2e0bdef46346c2996d5 (diff)
downloadexternal_mesa3d-d982922b184930a4ceed1d97b772cce5c371865d.zip
external_mesa3d-d982922b184930a4ceed1d97b772cce5c371865d.tar.gz
external_mesa3d-d982922b184930a4ceed1d97b772cce5c371865d.tar.bz2
i965/fs: add stride restrictions for copy propagation
There are various restrictions on what the hstride can be that depend on the Gen, and now that we're using hstride == 2 for packing/unpacking doubles, we're going to run into these restrictions a lot more often. Pull them out into a separate function, and move the one restriction we checked previously into it. Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp56
1 files changed, 55 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 426ea57..62ae9ab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode)
opcode == BRW_OPCODE_NOT);
}
+static bool
+can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
+ const brw_device_info *devinfo)
+{
+ if (stride > 4)
+ return false;
+
+ /* 3-source instructions can only be Align16, which restricts what strides
+ * they can take. They can only take a stride of 1 (the usual case), or 0
+ * with a special "repctrl" bit. But the repctrl bit doesn't work for
+ * 64-bit datatypes, so if the source type is 64-bit then only a stride of
+ * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page
+ * 944:
+ *
+ * This is applicable to 32b datatypes and 16b datatype. 64b datatypes
+ * cannot use the replicate control.
+ */
+ if (inst->is_3src()) {
+ if (type_sz(inst->src[arg].type) > 4)
+ return stride == 1;
+ else
+ return stride == 1 || stride == 0;
+ }
+
+ /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions",
+ * page 391 ("Extended Math Function"):
+ *
+ * The following restrictions apply for align1 mode: Scalar source is
+ * supported. Source and destination horizontal stride must be the
+ * same.
+ *
+ * From the Haswell PRM Volume 2b "Command Reference - Instructions", page
+ * 134 ("Extended Math Function"):
+ *
+ * Scalar source is supported. Source and destination horizontal stride
+ * must be 1.
+ *
+ * and similar language exists for IVB and SNB. Pre-SNB, math instructions
+ * are sends, so the sources are moved to MRF's and there are no
+ * restrictions.
+ */
+ if (inst->is_math()) {
+ if (devinfo->gen == 6 || devinfo->gen == 7) {
+ assert(inst->dst.stride == 1);
+ return stride == 1 || stride == 0;
+ } else if (devinfo->gen >= 8) {
+ return stride == inst->dst.stride || stride == 0;
+ }
+ }
+
+ return true;
+}
+
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
@@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
/* Bail if the result of composing both strides would exceed the
* hardware limit.
*/
- if (entry->src.stride * inst->src[arg].stride > 4)
+ if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride,
+ devinfo))
return false;
/* Bail if the instruction type is larger than the execution type of the