summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2016-10-07 01:16:24 +0200
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>2016-10-12 17:46:03 +0200
commit87b06cab14c449e442be27650024f044e93c9a7c (patch)
treefaca7df0af71290d3a0f0c94f32031fd2d9baa07 /src/gallium/drivers
parent85ba409967bb0327b85460639080214b3997fc17 (diff)
downloadexternal_mesa3d-87b06cab14c449e442be27650024f044e93c9a7c.zip
external_mesa3d-87b06cab14c449e442be27650024f044e93c9a7c.tar.gz
external_mesa3d-87b06cab14c449e442be27650024f044e93c9a7c.tar.bz2
nv50/ir: optimize ADD(SHL(a, b), c) to SHLADD(a, b, c)
total instructions in shared programs :2286901 -> 2284473 (-0.11%) total gprs used in shared programs :335256 -> 335273 (0.01%) total local used in shared programs :31968 -> 31968 (0.00%) local gpr inst bytes helped 0 41 852 852 hurt 0 44 23 23 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp87
1 files changed, 87 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 6efb29e..d88bb34 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2132,6 +2132,92 @@ AlgebraicOpt::visit(BasicBlock *bb)
// =============================================================================
+// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+class LateAlgebraicOpt : public Pass
+{
+private:
+ virtual bool visit(Instruction *);
+
+ void handleADD(Instruction *);
+ bool tryADDToSHLADD(Instruction *);
+};
+
+void
+LateAlgebraicOpt::handleADD(Instruction *add)
+{
+ Value *src0 = add->getSrc(0);
+ Value *src1 = add->getSrc(1);
+
+ if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
+ return;
+
+ if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType))
+ tryADDToSHLADD(add);
+}
+
+// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
+bool
+LateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
+{
+ Value *src0 = add->getSrc(0);
+ Value *src1 = add->getSrc(1);
+ ImmediateValue imm;
+ Instruction *shl;
+ Modifier mod[2];
+ Value *src;
+ int s;
+
+ if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8
+ || isFloatType(add->dType))
+ return false;
+
+ if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL)
+ s = 0;
+ else
+ if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL)
+ s = 1;
+ else
+ return false;
+
+ src = add->getSrc(s);
+ shl = src->getUniqueInsn();
+
+ if (shl->bb != add->bb || shl->usesFlags() || shl->subOp)
+ return false;
+
+ if (!shl->src(1).getImmediate(imm))
+ return false;
+
+ mod[0] = add->src(0).mod;
+ mod[1] = add->src(1).mod;
+
+ add->op = OP_SHLADD;
+ add->setSrc(2, add->src(!s));
+ add->src(2).mod = mod[s];
+
+ add->setSrc(0, shl->getSrc(0));
+ add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
+ add->src(1).mod = Modifier(0);
+
+ return true;
+}
+
+bool
+LateAlgebraicOpt::visit(Instruction *i)
+{
+ switch (i->op) {
+ case OP_ADD:
+ handleADD(i);
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+// =============================================================================
+
static inline void
updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
{
@@ -3436,6 +3522,7 @@ Program::optimizeSSA(int level)
RUN_PASS(2, AlgebraicOpt, run);
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
RUN_PASS(1, ConstantFolding, foldAll);
+ RUN_PASS(2, LateAlgebraicOpt, run);
RUN_PASS(1, LoadPropagation, run);
RUN_PASS(1, IndirectPropagation, run);
RUN_PASS(2, MemoryOpt, run);