summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/codegen
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/codegen')
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp66
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp35
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp136
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp109
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp1
8 files changed, 232 insertions, 129 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 216e119..c126c08 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1124,12 +1124,15 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
{
code[0] = 0x40000000;
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ if (i->sType == TYPE_S16)
+ code[0] |= 0x8100;
+ code[1] = 0;
+ emitForm_IMM(i);
+ } else
if (i->encSize == 8) {
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
- if (i->src(1).getFile() == FILE_IMMEDIATE)
- emitForm_IMM(i);
- else
- emitForm_MAD(i);
+ emitForm_MAD(i);
} else {
if (i->sType == TYPE_S16)
code[0] |= 0x8100;
@@ -1190,29 +1193,45 @@ CodeEmitterNV50::emitDMUL(const Instruction *i)
void
CodeEmitterNV50::emitIMAD(const Instruction *i)
{
+ int mode;
code[0] = 0x60000000;
- if (isSignedType(i->sType))
- code[1] = i->saturate ? 0x40000000 : 0x20000000;
- else
- code[1] = 0x00000000;
- int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
- int neg2 = i->src(2).mod.neg();
-
- assert(!(neg1 & neg2));
- code[1] |= neg1 << 27;
- code[1] |= neg2 << 26;
+ assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
+ if (!isSignedType(i->sType))
+ mode = 0;
+ else if (i->saturate)
+ mode = 2;
+ else
+ mode = 1;
- if (i->src(1).getFile() == FILE_IMMEDIATE)
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
emitForm_IMM(i);
- else
+ code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
+ if (i->flagsSrc >= 0) {
+ assert(!(code[0] & 0x10400000));
+ assert(SDATA(i->src(i->flagsSrc)).id == 0);
+ code[0] |= 0x10400000;
+ }
+ } else
+ if (i->encSize == 4) {
+ emitForm_MUL(i);
+ code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
+ if (i->flagsSrc >= 0) {
+ assert(!(code[0] & 0x10400000));
+ assert(SDATA(i->src(i->flagsSrc)).id == 0);
+ code[0] |= 0x10400000;
+ }
+ } else {
+ code[1] = mode << 29;
emitForm_MAD(i);
- if (i->flagsSrc >= 0) {
- // add with carry from $cX
- assert(!(code[1] & 0x0c000000) && !i->getPredicate());
- code[1] |= 0xc << 24;
- srcId(i->src(i->flagsSrc), 32 + 12);
+ if (i->flagsSrc >= 0) {
+ // add with carry from $cX
+ assert(!(code[1] & 0x0c000000) && !i->getPredicate());
+ code[1] |= 0xc << 24;
+ srcId(i->src(i->flagsSrc), 32 + 12);
+ }
}
}
@@ -2054,8 +2073,9 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
// check constraints on short MAD
if (info.srcNr >= 2 && i->srcExists(2)) {
- if (!i->defExists(0) || !isFloatType(i->dType) ||
- i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
+ if (!i->defExists(0) ||
+ (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
+ DDATA(i->def(0)).id != SDATA(i->src(2)).id)
return 8;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 1d2caab..b233860 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1897,7 +1897,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
shd = fetchSrc(C >> 4, C & 3);
if (texi->op == OP_TXD) {
- for (c = 0; c < tgt.getDim(); ++c) {
+ for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 420cc4e..0b90378 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -57,7 +57,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
Instruction *tex, *add;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 64f5fc0..8752b0c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -44,6 +44,8 @@ static bool
expandIntegerMUL(BuildUtil *bld, Instruction *mul)
{
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
+ ImmediateValue src1;
+ bool src1imm = mul->src(1).getImmediate(src1);
DataType fTy; // full type
switch (mul->sType) {
@@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
for (int j = 0; j < 4; ++j)
t[j] = bld->getSSA(fullSize);
- s[0] = mul->getSrc(0);
- s[1] = mul->getSrc(1);
-
if (isSignedType(mul->sType) && highResult) {
s[0] = bld->getSSA(fullSize);
s[1] = bld->getSSA(fullSize);
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
+ src1.reg.data.s32 = abs(src1.reg.data.s32);
+ } else {
+ s[0] = mul->getSrc(0);
+ s[1] = mul->getSrc(1);
}
// split sources into halves
i[0] = bld->mkSplit(a, halfSize, s[0]);
i[1] = bld->mkSplit(b, halfSize, s[1]);
- i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
- i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+ if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) {
+ i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1],
+ bld->mkImm(src1.reg.data.u32 & 0xffff));
+ } else {
+ i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0],
+ src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]);
+ if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+ i[3] = i[2];
+ t[1] = t[0];
+ } else {
+ i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+ }
+ }
i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
- i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+ if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+ i[4] = i[3];
+ t[3] = t[2];
+ } else {
+ i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+ }
if (highResult) {
Value *c[2];
@@ -911,7 +930,7 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
handleTEX(i);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -1225,7 +1244,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i)
i->setDef(0, new_LValue(func, FILE_GPR));
i->getDef(0)->reg.data.id = id;
- prog->maxGPR = MAX2(prog->maxGPR, id);
+ prog->maxGPR = MAX2(prog->maxGPR, id * 2);
}
}
return true;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 0f575f2..e67bf3e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -186,92 +186,68 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
uses.push_back(TexUse(usei, texi));
}
+// While it might be tempting to use the an algorithm that just looks at tex
+// uses, not all texture results are guaranteed to be used on all paths. In
+// the case where along some control flow path a texture result is never used,
+// we might reuse that register for something else, creating a
+// write-after-write hazard. So we have to manually look through all
+// instructions looking for ones that reference the registers in question.
void
-NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
- Instruction *insn,
- const BasicBlock *term,
- std::list<TexUse> &uses)
+NVC0LegalizePostRA::findFirstUses(
+ Instruction *texi, std::list<TexUse> &uses)
{
- while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
- insn = insn->getSrc(0)->getUniqueInsn();
-
- // NOTE: the tex itself is, of course, not an overwriting definition
- if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
- return;
+ int minGPR = texi->def(0).rep()->reg.data.id;
+ int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1;
- switch (insn->op) {
- /* Values not connected to the tex's definition through any of these should
- * not be conflicting.
- */
- case OP_SPLIT:
- case OP_MERGE:
- case OP_PHI:
- case OP_UNION:
- /* recurse again */
- for (int s = 0; insn->srcExists(s); ++s)
- findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
- uses);
- break;
- default:
- // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
- addTexUse(uses, insn, texi);
- break;
- }
+ unordered_set<const BasicBlock *> visited;
+ findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited);
}
void
-NVC0LegalizePostRA::findFirstUses(
- const Instruction *texi,
- const Instruction *insn,
- std::list<TexUse> &uses,
- unordered_set<const Instruction *>& visited)
+NVC0LegalizePostRA::findFirstUsesBB(
+ int minGPR, int maxGPR, Instruction *start,
+ const Instruction *texi, std::list<TexUse> &uses,
+ unordered_set<const BasicBlock *> &visited)
{
- for (int d = 0; insn->defExists(d); ++d) {
- Value *v = insn->getDef(d);
- for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
- Instruction *usei = (*u)->getInsn();
-
- // NOTE: In case of a loop that overwrites a value but never uses
- // it, it can happen that we have a cycle of uses that consists only
- // of phis and no-op moves and will thus cause an infinite loop here
- // since these are not considered actual uses.
- // The most obvious (and perhaps the only) way to prevent this is to
- // remember which instructions we've already visited.
-
- if (visited.find(usei) != visited.end())
- continue;
+ const BasicBlock *bb = start->bb;
+
+ // We don't process the whole bb the first time around. This is correct,
+ // however we might be in a loop and hit this BB again, and need to process
+ // the full thing. So only mark a bb as visited if we processed it from the
+ // beginning.
+ if (start == bb->getEntry()) {
+ if (visited.find(bb) != visited.end())
+ return;
+ visited.insert(bb);
+ }
- visited.insert(usei);
-
- if (usei->op == OP_PHI || usei->op == OP_UNION) {
- // need a barrier before WAW cases, like:
- // %r0 = tex
- // if ...
- // texbar <- is required or tex might replace x again
- // %r1 = x <- overwriting def
- // %r2 = phi %r0, %r1
- for (int s = 0; usei->srcExists(s); ++s) {
- Instruction *defi = usei->getSrc(s)->getUniqueInsn();
- if (defi && &usei->src(s) != *u)
- findOverwritingDefs(texi, defi, usei->bb, uses);
- }
- }
+ for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) {
+ if (insn->isNop())
+ continue;
- if (usei->op == OP_SPLIT ||
- usei->op == OP_MERGE ||
- usei->op == OP_PHI ||
- usei->op == OP_UNION) {
- // these uses don't manifest in the machine code
- findFirstUses(texi, usei, uses, visited);
- } else
- if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
- usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
- findFirstUses(texi, usei, uses, visited);
- } else {
- addTexUse(uses, usei, texi);
- }
+ for (int d = 0; insn->defExists(d); ++d) {
+ if (insn->def(d).getFile() != FILE_GPR ||
+ insn->def(d).rep()->reg.data.id < minGPR ||
+ insn->def(d).rep()->reg.data.id > maxGPR)
+ continue;
+ addTexUse(uses, insn, texi);
+ return;
+ }
+
+ for (int s = 0; insn->srcExists(s); ++s) {
+ if (insn->src(s).getFile() != FILE_GPR ||
+ insn->src(s).rep()->reg.data.id < minGPR ||
+ insn->src(s).rep()->reg.data.id > maxGPR)
+ continue;
+ addTexUse(uses, insn, texi);
+ return;
}
}
+
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(),
+ texi, uses, visited);
+ }
}
// Texture barriers:
@@ -323,8 +299,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
if (!uses)
return false;
for (size_t i = 0; i < texes.size(); ++i) {
- unordered_set<const Instruction *> visited;
- findFirstUses(texes[i], texes[i], uses[i], visited);
+ findFirstUses(texes[i], uses[i]);
}
// determine the barrier level at each use
@@ -870,7 +845,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -917,7 +892,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
bool
NVC0LoweringPass::handleTXD(TexInstruction *txd)
{
- int dim = txd->tex.target.getDim();
+ int dim = txd->tex.target.getDim() + txd->tex.target.isCube();
unsigned arg = txd->tex.target.getArgCount();
unsigned expected_args = arg;
const int chipset = prog->getTarget()->getChipset();
@@ -937,8 +912,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
if (expected_args > 4 ||
dim > 2 ||
- txd->tex.target.isShadow() ||
- txd->tex.target.isCube())
+ txd->tex.target.isShadow())
txd->op = OP_TEX;
handleTEX(txd);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 2ce52e5..adb400a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -69,12 +69,10 @@ private:
};
bool insertTextureBarriers(Function *);
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
- void findFirstUses(const Instruction *tex, const Instruction *def,
- std::list<TexUse>&,
- unordered_set<const Instruction *>&);
- void findOverwritingDefs(const Instruction *tex, Instruction *insn,
- const BasicBlock *term,
- std::list<TexUse>&);
+ void findFirstUses(Instruction *texi, std::list<TexUse> &uses);
+ void findFirstUsesBB(int minGPR, int maxGPR, Instruction *start,
+ const Instruction *texi, std::list<TexUse> &uses,
+ unordered_set<const BasicBlock *> &visited);
void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
const Instruction *recurseDef(const Instruction *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 805be5f..022626c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1501,6 +1501,7 @@ private:
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
void handleCVT_NEG(Instruction *);
+ void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
@@ -1792,6 +1793,47 @@ AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
delete_Instruction(prog, cvt);
}
+// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT
+// does a type conversion, this becomes trickier as there might be range
+// changes/etc. We could handle those in theory as long as the range was being
+// reduced or kept the same.
+void
+AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
+{
+ Instruction *insn = cvt->getSrc(0)->getInsn();
+ RoundMode rnd = insn->rnd;
+
+ if (insn->saturate ||
+ insn->subOp ||
+ insn->dType != insn->sType ||
+ insn->dType != cvt->sType)
+ return;
+
+ switch (insn->op) {
+ case OP_CEIL:
+ rnd = ROUND_PI;
+ break;
+ case OP_FLOOR:
+ rnd = ROUND_MI;
+ break;
+ case OP_TRUNC:
+ rnd = ROUND_ZI;
+ break;
+ case OP_CVT:
+ break;
+ default:
+ return;
+ }
+
+ if (!isFloatType(cvt->dType) || !isFloatType(insn->sType))
+ rnd = (RoundMode)(rnd & 3);
+
+ cvt->rnd = rnd;
+ cvt->setSrc(0, insn->getSrc(0));
+ cvt->src(0).mod *= insn->src(0).mod;
+ cvt->sType = insn->sType;
+}
+
// Some shaders extract packed bytes out of words and convert them to
// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
// nv50 for word sizes.
@@ -1961,6 +2003,7 @@ AlgebraicOpt::visit(BasicBlock *bb)
break;
case OP_CVT:
handleCVT_NEG(i);
+ handleCVT_CVT(i);
if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
handleCVT_EXTBF(i);
break;
@@ -2532,6 +2575,7 @@ MemoryOpt::runOpt(BasicBlock *bb)
class FlatteningPass : public Pass
{
private:
+ virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
bool tryPredicateConditional(BasicBlock *);
@@ -2540,6 +2584,8 @@ private:
inline bool isConstantCondition(Value *pred);
inline bool mayPredicate(const Instruction *, const Value *pred) const;
inline void removeFlow(Instruction *);
+
+ uint8_t gpr_unit;
};
bool
@@ -2561,9 +2607,15 @@ FlatteningPass::isConstantCondition(Value *pred)
file = ld->src(0).getFile();
} else {
file = insn->src(s).getFile();
- // catch $r63 on NVC0
- if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
- file = FILE_IMMEDIATE;
+ // catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is
+ // in register "units", which can vary between targets.
+ if (file == FILE_GPR) {
+ Value *v = insn->getSrc(s);
+ int bytes = v->reg.data.id * MIN2(v->reg.size, 4);
+ int units = bytes >> gpr_unit;
+ if (units > prog->maxGPR)
+ file = FILE_IMMEDIATE;
+ }
}
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
return false;
@@ -2669,6 +2721,14 @@ FlatteningPass::tryPropagateBranch(BasicBlock *bb)
}
bool
+FlatteningPass::visit(Function *fn)
+{
+ gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR);
+
+ return true;
+}
+
+bool
FlatteningPass::visit(BasicBlock *bb)
{
if (tryPredicateConditional(bb))
@@ -2774,6 +2834,15 @@ private:
virtual bool visit(BasicBlock *);
};
+static bool
+post_ra_dead(Instruction *i)
+{
+ for (int d = 0; i->defExists(d); ++d)
+ if (i->getDef(d)->refCount())
+ return false;
+ return true;
+}
+
bool
NV50PostRaConstantFolding::visit(BasicBlock *bb)
{
@@ -2787,24 +2856,48 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
i->src(0).getFile() != FILE_GPR ||
i->src(1).getFile() != FILE_GPR ||
i->src(2).getFile() != FILE_GPR ||
- i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id ||
- !isFloatType(i->dType))
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
break;
if (i->getDef(0)->reg.data.id >= 64 ||
i->getSrc(0)->reg.data.id >= 64)
break;
+ if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0)
+ break;
+
+ if (i->getPredicate())
+ break;
+
def = i->getSrc(1)->getInsn();
+ if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4)
+ def = def->getSrc(0)->getInsn();
if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
vtmp = i->getSrc(1);
- i->setSrc(1, def->getSrc(0));
+ if (isFloatType(i->sType)) {
+ i->setSrc(1, def->getSrc(0));
+ } else {
+ ImmediateValue val;
+ bool ret = def->src(0).getImmediate(val);
+ assert(ret);
+ if (i->getSrc(1)->reg.data.id & 1)
+ val.reg.data.u32 >>= 16;
+ val.reg.data.u32 &= 0xffff;
+ i->setSrc(1, new_ImmediateValue(bb->getProgram(), val.reg.data.u32));
+ }
/* There's no post-RA dead code elimination, so do it here
* XXX: if we add more code-removing post-RA passes, we might
* want to create a post-RA dead-code elim pass */
- if (vtmp->refCount() == 0)
- delete_Instruction(bb->getProgram(), def);
+ if (post_ra_dead(vtmp->getInsn())) {
+ Value *src = vtmp->getInsn()->getSrc(0);
+ // Careful -- splits will have already been removed from the
+ // functions. Don't double-delete.
+ if (vtmp->getInsn()->bb)
+ delete_Instruction(prog, vtmp->getInsn());
+ if (src->getInsn() && post_ra_dead(src->getInsn()))
+ delete_Instruction(prog, src->getInsn());
+ }
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index b32bc13..cd8c42c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1473,7 +1473,6 @@ GCRA::allocateRegisters(ArrayList& insns)
// Short encoding only possible if they're all GPRs, no need to
// affect them otherwise.
if (insn->flagsDef < 0 &&
- isFloatType(insn->dType) &&
insn->src(0).getFile() == FILE_GPR &&
insn->src(1).getFile() == FILE_GPR &&
insn->src(2).getFile() == FILE_GPR)