summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_compiler.h
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2016-09-15 17:20:23 -0700
committerFrancisco Jerez <currojerez@riseup.net>2016-09-21 13:45:46 +0300
commitf57f526fc5cfaedf26b2becf8f1899d5de0d0461 (patch)
tree5bbd786fffec71aac38a61002d85bf69a175e443 /src/mesa/drivers/dri/i965/brw_compiler.h
parent8a468d186e6fc27c26dd12ba989192e7596f667a (diff)
downloadexternal_mesa3d-f57f526fc5cfaedf26b2becf8f1899d5de0d0461.zip
external_mesa3d-f57f526fc5cfaedf26b2becf8f1899d5de0d0461.tar.gz
external_mesa3d-f57f526fc5cfaedf26b2becf8f1899d5de0d0461.tar.bz2
i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread dispatch.
The eliminate_find_live_channel optimization eliminates FIND_LIVE_CHANNEL instructions in cases where control flow is known to be uniform, and replaces them with 'MOV 0', which in turn unblocks subsequent elimination of the BROADCAST instruction frequently used on the result of FIND_LIVE_CHANNEL. This is however not correct in per-sample fragment shader dispatch because the PSD can dispatch a fully unlit sample under certain conditions. Disable the optimization in that case. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> v2: Add devinfo argument to brw_stage_has_packed_dispatch() to implement hardware generation check.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_compiler.h')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 84d3dde..445c166 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -868,6 +868,55 @@ encode_slm_size(unsigned gen, uint32_t bytes)
return slm_size;
}
+/**
+ * Return true if the given shader stage is dispatched contiguously by the
+ * relevant fixed function starting from channel 0 of the SIMD thread, which
+ * implies that the dispatch mask of a thread can be assumed to have the form
+ * '2^n - 1' for some n.
+ */
+static inline bool
+brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+ gl_shader_stage stage,
+ const struct brw_stage_prog_data *prog_data)
+{
+ /* The code below makes assumptions about the hardware's thread dispatch
+ * behavior that could be proven wrong in future generations -- Make sure
+ * to do a full test run with brw_fs_test_dispatch_packing() hooked up to
+ * the NIR front-end before changing this assertion.
+ */
+ assert(devinfo->gen <= 9);
+
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT: {
+ /* The PSD discards subspans coming in with no lit samples, which in the
+ * per-pixel shading case implies that each subspan will either be fully
+ * lit (due to the VMask being used to allow derivative computations),
+ * or not dispatched at all. In per-sample dispatch mode individual
+ * samples from the same subspan have a fixed relative location within
+ * the SIMD thread, so dispatch of unlit samples cannot be avoided in
+ * general and we should return false.
+ */
+ const struct brw_wm_prog_data *wm_prog_data =
+ (const struct brw_wm_prog_data *)prog_data;
+ return !wm_prog_data->persample_dispatch;
+ }
+ case MESA_SHADER_COMPUTE:
+ /* Compute shaders will be spawned with either a fully enabled dispatch
+ * mask or with whatever bottom/right execution mask was given to the
+ * GPGPU walker command to be used along the workgroup edges -- In both
+ * cases the dispatch mask is required to be tightly packed for our
+ * invocation index calculations to work.
+ */
+ return true;
+ default:
+ /* Most remaining fixed functions are limited to use a packed dispatch
+ * mask due to the hardware representation of the dispatch mask as a
+ * single counter representing the number of enabled channels.
+ */
+ return true;
+ }
+}
+
#ifdef __cplusplus
} /* extern "C" */
#endif