summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKarol Herbst <nouveau@karolherbst.de>2016-01-27 18:25:08 +0100
committerIlia Mirkin <imirkin@alum.mit.edu>2016-01-28 15:59:41 -0500
commit29d09f8747abea35f4deadced0196725d4ab89cf (patch)
tree4962a841506fac3483f0a974bad1f2be359357a1 /src
parent3aa681449ed030ba8b9c56f0a6f2b08bd1fb15a6 (diff)
downloadexternal_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.zip
external_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.tar.gz
external_mesa3d-29d09f8747abea35f4deadced0196725d4ab89cf.tar.bz2
nv50/ir: optimize mad/fma with third argument 0 to mul
Very modest effect, but it's clearly the right thing to do. total instructions in shared programs : 6131491 -> 6131398 (-0.00%) total gprs used in shared programs : 910157 -> 910131 (-0.00%) total local used in shared programs : 15328 -> 15328 (0.00%) local gpr inst bytes helped 0 55 85 85 hurt 0 26 20 20 Signed-off-by: Karol Herbst <nouveau@karolherbst.de> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp21
1 files changed, 21 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index bda9c7d..eb790d0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -336,6 +336,7 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -388,6 +389,8 @@ ConstantFolding::visit(BasicBlock *bb)
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
+ if (i->srcExists(2) && i->src(2).getImmediate(src2))
+ opnd3(i, src2);
}
return true;
}
@@ -873,6 +876,24 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
}
void
+ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
+{
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ if (imm2.isInteger(0)) {
+ i->op = OP_MUL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
+void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;