summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp81
1 files changed, 55 insertions, 26 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 82e8148..72dd31e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -278,7 +278,6 @@ private:
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
- // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
CmpInstruction *findOriginForTestWithZero(Value *);
unsigned int foldCount;
@@ -337,25 +336,33 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
return NULL;
Instruction *insn = value->getInsn();
- while (insn && insn->op != OP_SET) {
- Instruction *next = NULL;
- switch (insn->op) {
- case OP_NEG:
- case OP_ABS:
- case OP_CVT:
- next = insn->getSrc(0)->getInsn();
- if (insn->sType != next->dType)
+ if (insn->asCmp() && insn->op != OP_SLCT)
+ return insn->asCmp();
+
+ /* Sometimes mov's will sneak in as a result of other folding. This gets
+ * cleaned up later.
+ */
+ if (insn->op == OP_MOV)
+ return findOriginForTestWithZero(insn->getSrc(0));
+
+ /* Deal with AND 1.0 here since nv50 can't fold into boolean float */
+ if (insn->op == OP_AND) {
+ int s = 0;
+ ImmediateValue imm;
+ if (!insn->src(s).getImmediate(imm)) {
+ s = 1;
+ if (!insn->src(s).getImmediate(imm))
return NULL;
- break;
- case OP_MOV:
- next = insn->getSrc(0)->getInsn();
- break;
- default:
- return NULL;
}
- insn = next;
+ if (imm.reg.data.f32 != 1.0f)
+ return NULL;
+ /* TODO: Come up with a way to handle the condition being inverted */
+ if (insn->src(!s).mod != Modifier(0))
+ return NULL;
+ return findOriginForTestWithZero(insn->getSrc(!s));
}
- return insn ? insn->asCmp() : NULL;
+
+ return NULL;
}
void
@@ -946,29 +953,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_SET: // TODO: SET_AND,OR,XOR
{
+ /* This optimizes the case where the output of a set is being compared
+ * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+ * can be a lot cleverer in our comparison.
+ */
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
- if (i->src(t).mod != Modifier(0))
- return;
- if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+ if (imm0.reg.data.u32 != 0 || !si)
return;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+ // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+ // first.
if (s == 0)
ccZ = reverseCondCode(ccZ);
+ // If there is a negative modifier, we need to undo that, by flipping
+ // the comparison to zero.
+ if (i->src(t).mod.neg())
+ ccZ = reverseCondCode(ccZ);
+ // If this is a signed comparison, we expect the input to be a regular
+ // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+ // is positive, so just flip the sign.
+ if (i->sType == TYPE_S32) {
+ assert(!isFloatType(si->dType));
+ ccZ = reverseCondCode(ccZ);
+ }
switch (ccZ) {
- case CC_LT: cc = CC_FL; break;
- case CC_GE: cc = CC_TR; break;
- case CC_EQ: cc = inverseCondCode(cc); break;
- case CC_LE: cc = inverseCondCode(cc); break;
- case CC_GT: break;
- case CC_NE: break;
+ case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+ case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+ case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+ case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+ case CC_GT: break; // bool > 0 -- bool
+ case CC_NE: break; // bool != 0 -- bool
default:
return;
}
+
+ // Update the condition of this SET to be identical to the origin set,
+ // but with the updated condition code. The original SET should get
+ // DCE'd, ideally.
+ i->op = si->op;
i->asCmp()->setCond = cc;
i->setSrc(0, si->src(0));
i->setSrc(1, si->src(1));
+ if (si->srcExists(2))
+ i->setSrc(2, si->src(2));
i->sType = si->sType;
}
break;