summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2015-10-14 02:23:25 -0700
committerMatt Turner <mattst88@gmail.com>2015-10-19 10:19:32 -0700
commit35a2d259f27f5b41f29a1112ca48093dac09c364 (patch)
treec73a4009a7abb6cdea670f26a996fdb31cb22352 /src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
parent9e17c36b8ba79e688011a5fd293ad5f42da21b66 (diff)
downloadexternal_mesa3d-35a2d259f27f5b41f29a1112ca48093dac09c364.zip
external_mesa3d-35a2d259f27f5b41f29a1112ca48093dac09c364.tar.gz
external_mesa3d-35a2d259f27f5b41f29a1112ca48093dac09c364.tar.bz2
i965/fs: Consider type mismatches in saturate propagation.
NIR considers bcsel to produce and consume unsigned types, leading to SEL instructions operating on unsigned types when the data is really floating-point. Previous to this patch, saturate propagation would happily transform (+f0) sel g20:UD, g30:UD, g40:UD mov.sat g50:F, g20:F into (+f0) sel.sat g20:UD, g30:UD, g40:UD mov g50:F, g20:F But since the meaning of .sat is dependent on the type of the destination register, this is not valid. Instead, allow saturate propagation to change the types of dest/source on instructions that are simply copying data in order to propagate the saturate modifier. Fixes bad code gen in 158 programs. Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp15
1 files changed, 12 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index e406c28..8792a8c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -52,11 +52,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
ip--;
if (inst->opcode != BRW_OPCODE_MOV ||
+ !inst->saturate ||
inst->dst.file != GRF ||
+ inst->dst.type != inst->src[0].type ||
inst->src[0].file != GRF ||
inst->src[0].abs ||
- inst->src[0].negate ||
- !inst->saturate)
+ inst->src[0].negate)
continue;
int src_var = v->live_intervals->var_from_reg(inst->src[0]);
@@ -65,7 +66,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
bool interfered = false;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) {
if (scan_inst->overwrites_reg(inst->src[0])) {
- if (scan_inst->is_partial_write())
+ if (scan_inst->is_partial_write() ||
+ (scan_inst->dst.type != inst->dst.type &&
+ !scan_inst->can_change_types()))
break;
if (scan_inst->saturate) {
@@ -73,6 +76,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
progress = true;
} else if (src_end_ip <= ip || inst->dst.equals(inst->src[0])) {
if (scan_inst->can_do_saturate()) {
+ if (scan_inst->dst.type != inst->dst.type) {
+ scan_inst->dst.type = inst->dst.type;
+ for (int i = 0; i < scan_inst->sources; i++) {
+ scan_inst->src[i].type = inst->dst.type;
+ }
+ }
scan_inst->saturate = true;
inst->saturate = false;
progress = true;