diff options
author | Karol Herbst <nouveau@karolherbst.de> | 2016-01-27 18:25:08 +0100 |
---|---|---|
committer | Ilia Mirkin <imirkin@alum.mit.edu> | 2016-01-28 15:59:41 -0500 |
commit | 29d09f8747abea35f4deadced0196725d4ab89cf (patch) | |
tree | 4962a841506fac3483f0a974bad1f2be359357a1 /src | |
parent | 3aa681449ed030ba8b9c56f0a6f2b08bd1fb15a6 (diff) | |
download | external_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.zip external_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.tar.gz external_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.tar.bz2 |
nv50/ir: optimize mad/fma with third argument 0 to mul
Very modest effect, but it's clearly the right thing to do.
total instructions in shared programs : 6131491 -> 6131398 (-0.00%)
total gprs used in shared programs : 910157 -> 910131 (-0.00%)
total local used in shared programs : 15328 -> 15328 (0.00%)
local gpr inst bytes
helped 0 55 85 85
hurt 0 26 20 20
Signed-off-by: Karol Herbst <nouveau@karolherbst.de>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index bda9c7d..eb790d0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -336,6 +336,7 @@ private: void expr(Instruction *, ImmediateValue&, ImmediateValue&); void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); void opnd(Instruction *, ImmediateValue&, int s); + void opnd3(Instruction *, ImmediateValue&); void unary(Instruction *, const ImmediateValue&); @@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb) else if (i->srcExists(1) && i->src(1).getImmediate(src1)) opnd(i, src1, 1); + if (i->srcExists(2) && i->src(2).getImmediate(src2)) + opnd3(i, src2); } return true; } @@ -873,6 +876,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, } void +ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) +{ + switch (i->op) { + case OP_MAD: + case OP_FMA: + if (imm2.isInteger(0)) { + i->op = OP_MUL; + i->setSrc(2, NULL); + foldCount++; + return; + } + break; + default: + return; + } +} + +void ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; |