summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
diff options
context:
space:
mode:
authorNeil Roberts <neil@linux.intel.com>2015-05-28 15:27:31 +0100
committerNeil Roberts <neil@linux.intel.com>2015-05-31 00:48:57 +0100
commit6c846dc57b1d6f3e015a604dba1976f96c4be9e9 (patch)
tree9fe7e442a103777003ab60d237895182cd1c22e9 /src/mesa/drivers/dri/i965/brw_fs_generator.cpp
parentec1c72d38ea4c709a39c6be9e0ff96bc2a90940f (diff)
downloadexternal_mesa3d-6c846dc57b1d6f3e015a604dba1976f96c4be9e9.zip
external_mesa3d-6c846dc57b1d6f3e015a604dba1976f96c4be9e9.tar.gz
external_mesa3d-6c846dc57b1d6f3e015a604dba1976f96c4be9e9.tar.bz2
i965: Don't use a temporary when generating an indirect sample
Previously when generating the send instruction for a sample instruction with an indirect sampler it would use the destination register as a temporary store. This breaks when used in combination with the opt_sampler_eot optimisation because that forces the destination to be null. This patch fixes that by avoiding the temp register altogether. The reason the temporary register was needed was because it was trying to ensure the binding table index doesn't overflow a byte by and'ing it with 0xff. The result is then or'd with samper_index<<8. This patch instead just and's the whole thing by 0xfff. This will ensure that a bogus sampler index won't overflow into the rest of the message descriptor but unlike the previous code it won't ensure that the binding table index doesn't overflow into the sampler index. It doesn't seem like that should matter very much though because if the shader is generating a bogus sampler index then it's going to just get garbage out either way. Instead of doing sampler_index<<8|(sampler_index+base_table_index) the new code avoids one operation by doing sampler_index*0x101+base_table_index which should be equivalent. However if we wanted to avoid the multiply for some reason we could do this by adding an extra or instruction still without needing the temporary register. This fixes a number of Piglit tests on Skylake that were using indirect samplers such as: spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> Acked-by: Ben Widawsky <ben@bwidawsk.net> Tested-by: Anuj Phogat <anuj.phogat@gmail.com>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_generator.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp17
1 files changed, 4 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0be0f86..ea46b1a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -779,27 +779,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
} else {
/* Non-const sampler index */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_ud(0x101));
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
brw_pop_insn_state(p);