diff options
author | Matt Turner <mattst88@gmail.com> | 2016-01-21 09:10:09 -0800 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2016-03-04 11:52:34 -0800 |
commit | 1f862e923cba1d5cd54a707f70f0be113635e855 (patch) | |
tree | 076cbf6c8bc3d091ac7b2703acc17c1f9314143f /src/mesa | |
parent | 905ff861982450831a56d112036f68a751337441 (diff) | |
download | external_mesa3d-1f862e923cba1d5cd54a707f70f0be113635e855.zip external_mesa3d-1f862e923cba1d5cd54a707f70f0be113635e855.tar.gz external_mesa3d-1f862e923cba1d5cd54a707f70f0be113635e855.tar.bz2 |
i965/fs: Optimize float conversions of byte/word extract.
instructions in affected programs: 31535 -> 29966 (-4.98%)
helped: 23
cycles in affected programs: 272648 -> 266022 (-2.43%)
helped: 14
HURT: 1
The patch decreases the number of instructions in the two Unigine
programs by:
#1721: 4374 -> 4155 instructions (-5.01%)
#1706: 3582 -> 3363 instructions (-6.11%)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 |
2 files changed, 48 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7446ca1..21c7813 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -272,6 +272,8 @@ public: void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); + bool optimize_extract_to_float(nir_alu_instr *instr, + const fs_reg &result); bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index db20c71..04e9b8f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr) } } +/** + * Recognizes a parent instruction of nir_op_extract_* and changes the type to + * match instr. + */ +bool +fs_visitor::optimize_extract_to_float(nir_alu_instr *instr, + const fs_reg &result) +{ + if (!instr->src[0].src.is_ssa || + !instr->src[0].src.ssa->parent_instr) + return false; + + if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *src0 = + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); + + if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 && + src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16) + return false; + + nir_const_value *element = nir_src_as_const_value(src0->src[1].src); + assert(element != NULL); + + enum opcode extract_op; + if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) { + assert(element->u[0] <= 1); + extract_op = SHADER_OPCODE_EXTRACT_WORD; + } else { + assert(element->u[0] <= 3); + extract_op = SHADER_OPCODE_EXTRACT_BYTE; + } + + fs_reg op0 = get_nir_src(src0->src[0].src); + op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]); + op0 = offset(op0, bld, src0->src[0].swizzle[0]); + + set_saturate(instr->dest.saturate, + bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0]))); + return true; +} + bool fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result) @@ -671,6 +714,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) switch (instr->op) { case nir_op_i2f: case nir_op_u2f: + if (optimize_extract_to_float(instr, result)) + return; + inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; |