summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-09-24 23:47:29 -0700
committerKenneth Graunke <kenneth@whitecape.org>2015-09-26 12:02:31 -0700
commit08fe5799e61e9251dec163d000709ff33434216d (patch)
treef68270587ceee0dd9a2b1ab7228a417a5d97d5d5 /src/mesa
parentf0a618ee7c26a3dd54292fbc2bfd914b0d680ed9 (diff)
downloadexternal_mesa3d-08fe5799e61e9251dec163d000709ff33434216d.zip
external_mesa3d-08fe5799e61e9251dec163d000709ff33434216d.tar.gz
external_mesa3d-08fe5799e61e9251dec163d000709ff33434216d.tar.bz2
i965/gs: Allow src0 immediates in GS_OPCODE_SET_WRITE_OFFSET.
GS_OPCODE_SET_WRITE_OFFSET is a MUL with a constant src[1] and special strides. We can easily make the generator handle constant src[0] arguments by instead generating a MOV with the product of both operands. This isn't necessarily a win in and of itself - instead of a MUL, we generate a MOV, which should be basically the same cost. However, we can probably avoid the earlier MOV to put src[0] into a register. shader-db statistics for geometry shaders only: total instructions in shared programs: 3207 -> 3173 (-1.06%) instructions in affected programs: 3207 -> 3173 (-1.06%) helped: 11 Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp9
2 files changed, 14 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 5b6444e..610caef 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -202,6 +202,13 @@ try_constant_propagate(const struct brw_device_info *devinfo,
return true;
}
break;
+ case GS_OPCODE_SET_WRITE_OFFSET:
+ /* This is just a multiply by a constant with special strides.
+ * The generator will handle immediates in both arguments (generating
+ * a single MOV of the product). So feel free to propagate in src0.
+ */
+ inst->src[arg] = value;
+ return true;
case BRW_OPCODE_CMP:
if (arg == 1) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 50f9663..dcacc90 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -541,8 +541,13 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
- brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
- retype(src1, BRW_REGISTER_TYPE_UW));
+ if (src0.file == IMM) {
+ brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
+ brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+ } else {
+ brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
+ retype(src1, BRW_REGISTER_TYPE_UW));
+ }
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}