diff options
Diffstat (limited to 'src/gallium/drivers/nouveau')
31 files changed, 388 insertions, 206 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 216e119..c126c08 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1124,12 +1124,15 @@ CodeEmitterNV50::emitIMUL(const Instruction *i) { code[0] = 0x40000000; + if (i->src(1).getFile() == FILE_IMMEDIATE) { + if (i->sType == TYPE_S16) + code[0] |= 0x8100; + code[1] = 0; + emitForm_IMM(i); + } else if (i->encSize == 8) { code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; - if (i->src(1).getFile() == FILE_IMMEDIATE) - emitForm_IMM(i); - else - emitForm_MAD(i); + emitForm_MAD(i); } else { if (i->sType == TYPE_S16) code[0] |= 0x8100; @@ -1190,29 +1193,45 @@ CodeEmitterNV50::emitDMUL(const Instruction *i) void CodeEmitterNV50::emitIMAD(const Instruction *i) { + int mode; code[0] = 0x60000000; - if (isSignedType(i->sType)) - code[1] = i->saturate ? 0x40000000 : 0x20000000; - else - code[1] = 0x00000000; - int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); - int neg2 = i->src(2).mod.neg(); - - assert(!(neg1 & neg2)); - code[1] |= neg1 << 27; - code[1] |= neg2 << 26; + assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod); + if (!isSignedType(i->sType)) + mode = 0; + else if (i->saturate) + mode = 2; + else + mode = 1; - if (i->src(1).getFile() == FILE_IMMEDIATE) + if (i->src(1).getFile() == FILE_IMMEDIATE) { + code[1] = 0; emitForm_IMM(i); - else + code[0] |= (mode & 1) << 8 | (mode & 2) << 14; + if (i->flagsSrc >= 0) { + assert(!(code[0] & 0x10400000)); + assert(SDATA(i->src(i->flagsSrc)).id == 0); + code[0] |= 0x10400000; + } + } else + if (i->encSize == 4) { + emitForm_MUL(i); + code[0] |= (mode & 1) << 8 | (mode & 2) << 14; + if (i->flagsSrc >= 0) { + assert(!(code[0] & 0x10400000)); + assert(SDATA(i->src(i->flagsSrc)).id == 0); + code[0] |= 0x10400000; + } + } else { + code[1] = mode << 29; emitForm_MAD(i); - if (i->flagsSrc >= 0) { - // add with carry from $cX - assert(!(code[1] & 0x0c000000) && !i->getPredicate()); - code[1] |= 0xc << 24; - srcId(i->src(i->flagsSrc), 32 + 12); + if (i->flagsSrc >= 0) { + // add with carry from $cX + assert(!(code[1] & 0x0c000000) && !i->getPredicate()); + code[1] |= 0xc << 24; + srcId(i->src(i->flagsSrc), 32 + 12); + } } } @@ -2054,8 +2073,9 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const // check constraints on short MAD if (info.srcNr >= 2 && i->srcExists(2)) { - if (!i->defExists(0) || !isFloatType(i->dType) || - i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) + if (!i->defExists(0) || + (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) || + DDATA(i->def(0)).id != SDATA(i->src(2)).id) return 8; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 1d2caab..b233860 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1897,7 +1897,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) shd = fetchSrc(C >> 4, C & 3); if (texi->op == OP_TXD) { - for (c = 0; c < tgt.getDim(); ++c) { + for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) { texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 420cc4e..0b90378 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -57,7 +57,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) Instruction *tex, *add; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; - const int dim = i->tex.target.getDim(); + const int dim = i->tex.target.getDim() + i->tex.target.isCube(); const int array = i->tex.target.isArray(); i->op = OP_TEX; // no need to clone dPdx/dPdy later diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 64f5fc0..8752b0c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -44,6 +44,8 @@ static bool expandIntegerMUL(BuildUtil *bld, Instruction *mul) { const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH; + ImmediateValue src1; + bool src1imm = mul->src(1).getImmediate(src1); DataType fTy; // full type switch (mul->sType) { @@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) for (int j = 0; j < 4; ++j) t[j] = bld->getSSA(fullSize); - s[0] = mul->getSrc(0); - s[1] = mul->getSrc(1); - if (isSignedType(mul->sType) && highResult) { s[0] = bld->getSSA(fullSize); s[1] = bld->getSSA(fullSize); bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0)); bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1)); + src1.reg.data.s32 = abs(src1.reg.data.s32); + } else { + s[0] = mul->getSrc(0); + s[1] = mul->getSrc(1); } // split sources into halves i[0] = bld->mkSplit(a, halfSize, s[0]); i[1] = bld->mkSplit(b, halfSize, s[1]); - i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); - i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); + if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) { + i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1], + bld->mkImm(src1.reg.data.u32 & 0xffff)); + } else { + i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], + src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]); + if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) { + i[3] = i[2]; + t[1] = t[0]; + } else { + i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); + } + } i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8)); - i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); + if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) { + i[4] = i[3]; + t[3] = t[2]; + } else { + i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); + } if (highResult) { Value *c[2]; @@ -911,7 +930,7 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i) Instruction *tex; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; - const int dim = i->tex.target.getDim(); + const int dim = i->tex.target.getDim() + i->tex.target.isCube(); handleTEX(i); i->op = OP_TEX; // no need to clone dPdx/dPdy later @@ -1225,7 +1244,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i) i->setDef(0, new_LValue(func, FILE_GPR)); i->getDef(0)->reg.data.id = id; - prog->maxGPR = MAX2(prog->maxGPR, id); + prog->maxGPR = MAX2(prog->maxGPR, id * 2); } } return true; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 0f575f2..e67bf3e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -186,92 +186,68 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, uses.push_back(TexUse(usei, texi)); } +// While it might be tempting to use the an algorithm that just looks at tex +// uses, not all texture results are guaranteed to be used on all paths. In +// the case where along some control flow path a texture result is never used, +// we might reuse that register for something else, creating a +// write-after-write hazard. So we have to manually look through all +// instructions looking for ones that reference the registers in question. void -NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, - Instruction *insn, - const BasicBlock *term, - std::list<TexUse> &uses) +NVC0LegalizePostRA::findFirstUses( + Instruction *texi, std::list<TexUse> &uses) { - while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) - insn = insn->getSrc(0)->getUniqueInsn(); - - // NOTE: the tex itself is, of course, not an overwriting definition - if (insn == texi || !insn->bb->reachableBy(texi->bb, term)) - return; + int minGPR = texi->def(0).rep()->reg.data.id; + int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1; - switch (insn->op) { - /* Values not connected to the tex's definition through any of these should - * not be conflicting. - */ - case OP_SPLIT: - case OP_MERGE: - case OP_PHI: - case OP_UNION: - /* recurse again */ - for (int s = 0; insn->srcExists(s); ++s) - findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, - uses); - break; - default: - // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? - addTexUse(uses, insn, texi); - break; - } + unordered_set<const BasicBlock *> visited; + findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited); } void -NVC0LegalizePostRA::findFirstUses( - const Instruction *texi, - const Instruction *insn, - std::list<TexUse> &uses, - unordered_set<const Instruction *>& visited) +NVC0LegalizePostRA::findFirstUsesBB( + int minGPR, int maxGPR, Instruction *start, + const Instruction *texi, std::list<TexUse> &uses, + unordered_set<const BasicBlock *> &visited) { - for (int d = 0; insn->defExists(d); ++d) { - Value *v = insn->getDef(d); - for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { - Instruction *usei = (*u)->getInsn(); - - // NOTE: In case of a loop that overwrites a value but never uses - // it, it can happen that we have a cycle of uses that consists only - // of phis and no-op moves and will thus cause an infinite loop here - // since these are not considered actual uses. - // The most obvious (and perhaps the only) way to prevent this is to - // remember which instructions we've already visited. - - if (visited.find(usei) != visited.end()) - continue; + const BasicBlock *bb = start->bb; + + // We don't process the whole bb the first time around. This is correct, + // however we might be in a loop and hit this BB again, and need to process + // the full thing. So only mark a bb as visited if we processed it from the + // beginning. + if (start == bb->getEntry()) { + if (visited.find(bb) != visited.end()) + return; + visited.insert(bb); + } - visited.insert(usei); - - if (usei->op == OP_PHI || usei->op == OP_UNION) { - // need a barrier before WAW cases, like: - // %r0 = tex - // if ... - // texbar <- is required or tex might replace x again - // %r1 = x <- overwriting def - // %r2 = phi %r0, %r1 - for (int s = 0; usei->srcExists(s); ++s) { - Instruction *defi = usei->getSrc(s)->getUniqueInsn(); - if (defi && &usei->src(s) != *u) - findOverwritingDefs(texi, defi, usei->bb, uses); - } - } + for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) { + if (insn->isNop()) + continue; - if (usei->op == OP_SPLIT || - usei->op == OP_MERGE || - usei->op == OP_PHI || - usei->op == OP_UNION) { - // these uses don't manifest in the machine code - findFirstUses(texi, usei, uses, visited); - } else - if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && - usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { - findFirstUses(texi, usei, uses, visited); - } else { - addTexUse(uses, usei, texi); - } + for (int d = 0; insn->defExists(d); ++d) { + if (insn->def(d).getFile() != FILE_GPR || + insn->def(d).rep()->reg.data.id < minGPR || + insn->def(d).rep()->reg.data.id > maxGPR) + continue; + addTexUse(uses, insn, texi); + return; + } + + for (int s = 0; insn->srcExists(s); ++s) { + if (insn->src(s).getFile() != FILE_GPR || + insn->src(s).rep()->reg.data.id < minGPR || + insn->src(s).rep()->reg.data.id > maxGPR) + continue; + addTexUse(uses, insn, texi); + return; } } + + for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { + findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(), + texi, uses, visited); + } } // Texture barriers: @@ -323,8 +299,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn) if (!uses) return false; for (size_t i = 0; i < texes.size(); ++i) { - unordered_set<const Instruction *> visited; - findFirstUses(texes[i], texes[i], uses[i], visited); + findFirstUses(texes[i], uses[i]); } // determine the barrier level at each use @@ -870,7 +845,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) Instruction *tex; Value *zero = bld.loadImm(bld.getSSA(), 0); int l, c; - const int dim = i->tex.target.getDim(); + const int dim = i->tex.target.getDim() + i->tex.target.isCube(); const int array = i->tex.target.isArray(); i->op = OP_TEX; // no need to clone dPdx/dPdy later @@ -917,7 +892,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) bool NVC0LoweringPass::handleTXD(TexInstruction *txd) { - int dim = txd->tex.target.getDim(); + int dim = txd->tex.target.getDim() + txd->tex.target.isCube(); unsigned arg = txd->tex.target.getArgCount(); unsigned expected_args = arg; const int chipset = prog->getTarget()->getChipset(); @@ -937,8 +912,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd) if (expected_args > 4 || dim > 2 || - txd->tex.target.isShadow() || - txd->tex.target.isCube()) + txd->tex.target.isShadow()) txd->op = OP_TEX; handleTEX(txd); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 2ce52e5..adb400a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -69,12 +69,10 @@ private: }; bool insertTextureBarriers(Function *); inline bool insnDominatedBy(const Instruction *, const Instruction *) const; - void findFirstUses(const Instruction *tex, const Instruction *def, - std::list<TexUse>&, - unordered_set<const Instruction *>&); - void findOverwritingDefs(const Instruction *tex, Instruction *insn, - const BasicBlock *term, - std::list<TexUse>&); + void findFirstUses(Instruction *texi, std::list<TexUse> &uses); + void findFirstUsesBB(int minGPR, int maxGPR, Instruction *start, + const Instruction *texi, std::list<TexUse> &uses, + unordered_set<const BasicBlock *> &visited); void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *); const Instruction *recurseDef(const Instruction *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 805be5f..022626c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1501,6 +1501,7 @@ private: void handleSLCT(Instruction *); void handleLOGOP(Instruction *); void handleCVT_NEG(Instruction *); + void handleCVT_CVT(Instruction *); void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); @@ -1792,6 +1793,47 @@ AlgebraicOpt::handleCVT_NEG(Instruction *cvt) delete_Instruction(prog, cvt); } +// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT +// does a type conversion, this becomes trickier as there might be range +// changes/etc. We could handle those in theory as long as the range was being +// reduced or kept the same. +void +AlgebraicOpt::handleCVT_CVT(Instruction *cvt) +{ + Instruction *insn = cvt->getSrc(0)->getInsn(); + RoundMode rnd = insn->rnd; + + if (insn->saturate || + insn->subOp || + insn->dType != insn->sType || + insn->dType != cvt->sType) + return; + + switch (insn->op) { + case OP_CEIL: + rnd = ROUND_PI; + break; + case OP_FLOOR: + rnd = ROUND_MI; + break; + case OP_TRUNC: + rnd = ROUND_ZI; + break; + case OP_CVT: + break; + default: + return; + } + + if (!isFloatType(cvt->dType) || !isFloatType(insn->sType)) + rnd = (RoundMode)(rnd & 3); + + cvt->rnd = rnd; + cvt->setSrc(0, insn->getSrc(0)); + cvt->src(0).mod *= insn->src(0).mod; + cvt->sType = insn->sType; +} + // Some shaders extract packed bytes out of words and convert them to // e.g. float. The Fermi+ CVT instruction can extract those directly, as can // nv50 for word sizes. @@ -1961,6 +2003,7 @@ AlgebraicOpt::visit(BasicBlock *bb) break; case OP_CVT: handleCVT_NEG(i); + handleCVT_CVT(i); if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32)) handleCVT_EXTBF(i); break; @@ -2532,6 +2575,7 @@ MemoryOpt::runOpt(BasicBlock *bb) class FlatteningPass : public Pass { private: + virtual bool visit(Function *); virtual bool visit(BasicBlock *); bool tryPredicateConditional(BasicBlock *); @@ -2540,6 +2584,8 @@ private: inline bool isConstantCondition(Value *pred); inline bool mayPredicate(const Instruction *, const Value *pred) const; inline void removeFlow(Instruction *); + + uint8_t gpr_unit; }; bool @@ -2561,9 +2607,15 @@ FlatteningPass::isConstantCondition(Value *pred) file = ld->src(0).getFile(); } else { file = insn->src(s).getFile(); - // catch $r63 on NVC0 - if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) - file = FILE_IMMEDIATE; + // catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is + // in register "units", which can vary between targets. + if (file == FILE_GPR) { + Value *v = insn->getSrc(s); + int bytes = v->reg.data.id * MIN2(v->reg.size, 4); + int units = bytes >> gpr_unit; + if (units > prog->maxGPR) + file = FILE_IMMEDIATE; + } } if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) return false; @@ -2669,6 +2721,14 @@ FlatteningPass::tryPropagateBranch(BasicBlock *bb) } bool +FlatteningPass::visit(Function *fn) +{ + gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR); + + return true; +} + +bool FlatteningPass::visit(BasicBlock *bb) { if (tryPredicateConditional(bb)) @@ -2774,6 +2834,15 @@ private: virtual bool visit(BasicBlock *); }; +static bool +post_ra_dead(Instruction *i) +{ + for (int d = 0; i->defExists(d); ++d) + if (i->getDef(d)->refCount()) + return false; + return true; +} + bool NV50PostRaConstantFolding::visit(BasicBlock *bb) { @@ -2787,24 +2856,48 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb) i->src(0).getFile() != FILE_GPR || i->src(1).getFile() != FILE_GPR || i->src(2).getFile() != FILE_GPR || - i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id || - !isFloatType(i->dType)) + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) break; if (i->getDef(0)->reg.data.id >= 64 || i->getSrc(0)->reg.data.id >= 64) break; + if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0) + break; + + if (i->getPredicate()) + break; + def = i->getSrc(1)->getInsn(); + if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4) + def = def->getSrc(0)->getInsn(); if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { vtmp = i->getSrc(1); - i->setSrc(1, def->getSrc(0)); + if (isFloatType(i->sType)) { + i->setSrc(1, def->getSrc(0)); + } else { + ImmediateValue val; + bool ret = def->src(0).getImmediate(val); + assert(ret); + if (i->getSrc(1)->reg.data.id & 1) + val.reg.data.u32 >>= 16; + val.reg.data.u32 &= 0xffff; + i->setSrc(1, new_ImmediateValue(bb->getProgram(), val.reg.data.u32)); + } /* There's no post-RA dead code elimination, so do it here * XXX: if we add more code-removing post-RA passes, we might * want to create a post-RA dead-code elim pass */ - if (vtmp->refCount() == 0) - delete_Instruction(bb->getProgram(), def); + if (post_ra_dead(vtmp->getInsn())) { + Value *src = vtmp->getInsn()->getSrc(0); + // Careful -- splits will have already been removed from the + // functions. Don't double-delete. + if (vtmp->getInsn()->bb) + delete_Instruction(prog, vtmp->getInsn()); + if (src->getInsn() && post_ra_dead(src->getInsn())) + delete_Instruction(prog, src->getInsn()); + } break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index b32bc13..cd8c42c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -1473,7 +1473,6 @@ GCRA::allocateRegisters(ArrayList& insns) // Short encoding only possible if they're all GPRs, no need to // affect them otherwise. if (insn->flagsDef < 0 && - isFloatType(insn->dType) && insn->src(0).getFile() == FILE_GPR && insn->src(1).getFile() == FILE_GPR && insn->src(2).getFile() == FILE_GPR) diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index a6065e4..4ca9e5c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -147,6 +147,12 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) if (nv_dbg) nouveau_mesa_debug = atoi(nv_dbg); + /* These must be set before any failure is possible, as the cleanup + * paths assume they're responsible for deleting them. + */ + screen->drm = nouveau_drm(&dev->object); + screen->device = dev; + /* * this is initialized to 1 in nouveau_drm_screen_create after screen * is fully constructed and added to the global screen list. @@ -175,7 +181,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) data, size, &screen->channel); if (ret) return ret; - screen->device = dev; ret = nouveau_client_new(screen->device, &screen->client); if (ret) @@ -229,6 +234,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + int fd = screen->drm->fd; + nouveau_mm_destroy(screen->mm_GART); nouveau_mm_destroy(screen->mm_VRAM); @@ -238,6 +245,8 @@ nouveau_screen_fini(struct nouveau_screen *screen) nouveau_object_del(&screen->channel); nouveau_device_del(&screen->device); + nouveau_drm_del(&screen->drm); + close(fd); } static void diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index 328646f..28c4760 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -17,6 +17,7 @@ struct nouveau_bo; struct nouveau_screen { struct pipe_screen base; + struct nouveau_drm *drm; struct nouveau_device *device; struct nouveau_object *channel; struct nouveau_client *client; diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 1319c32..f13988e 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -6,6 +6,7 @@ #include "pipe/p_defines.h" +#include <drm.h> #include <nouveau.h> #ifndef NV04_PFIFO_MAX_PACKET_LEN @@ -79,13 +80,13 @@ nouveau_screen_transfer_flags(unsigned pipe) return flags; } -extern struct pipe_screen * +extern struct nouveau_screen * nv30_screen_create(struct nouveau_device *); -extern struct pipe_screen * +extern struct nouveau_screen * nv50_screen_create(struct nouveau_device *); -extern struct pipe_screen * +extern struct nouveau_screen * nvc0_screen_create(struct nouveau_device *); #endif diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 154c3d3..854f70c 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -413,23 +413,20 @@ nv30_screen_destroy(struct pipe_screen *pscreen) #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ - nv30_screen_destroy(pscreen); \ - return NULL; \ + screen->base.base.context_create = NULL; \ + return &screen->base; \ } while(0) -struct pipe_screen * +struct nouveau_screen * nv30_screen_create(struct nouveau_device *dev) { - struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct nv30_screen *screen; struct pipe_screen *pscreen; struct nouveau_pushbuf *push; struct nv04_fifo *fifo; unsigned oclass = 0; int ret, i; - if (!screen) - return NULL; - switch (dev->chipset & 0xf0) { case 0x30: if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f))) @@ -458,10 +455,16 @@ nv30_screen_create(struct nouveau_device *dev) if (!oclass) { NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); - FREE(screen); return NULL; } + screen = CALLOC_STRUCT(nv30_screen); + if (!screen) + return NULL; + + pscreen = &screen->base.base; + pscreen->destroy = nv30_screen_destroy; + /* * Some modern apps try to use msaa without keeping in mind the * restrictions on videomem of older cards. Resulting in dmesg saying: @@ -479,8 +482,6 @@ nv30_screen_create(struct nouveau_device *dev) if (screen->max_sample_count > 4) screen->max_sample_count = 4; - pscreen = &screen->base.base; - pscreen->destroy = nv30_screen_destroy; pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->get_shader_param = nv30_screen_get_shader_param; @@ -693,5 +694,5 @@ nv30_screen_create(struct nouveau_device *dev) nouveau_pushbuf_kick(push, push->channel); nouveau_fence_new(&screen->base, &screen->base.fence.current, false); - return pscreen; + return &screen->base; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 812d10c..7450119 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -336,9 +336,10 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nouveau_drm *drm = nouveau_screen(pscreen)->drm; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_resource *pt = &mt->base.base; - bool compressed = dev->drm_version >= 0x01000101; + bool compressed = drm->version >= 0x01000101; int ret; union nouveau_bo_config bo_config; uint32_t bo_flags; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index b6ebbbf..cccd3b7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -113,6 +113,12 @@ static void nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) { struct nv50_hw_query *hq = nv50_hw_query(q); + + if (hq->funcs && hq->funcs->destroy_query) { + hq->funcs->destroy_query(nv50, hq); + return; + } + nv50_hw_query_allocate(nv50, q, 0); nouveau_fence_ref(NULL, &hq->fence); FREE(hq); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c index d1bccb9..4a605f2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c @@ -71,7 +71,8 @@ nv50_hw_metric_destroy_query(struct nv50_context *nv50, unsigned i; for (i = 0; i < hmq->num_queries; i++) - hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]); + if (hmq->queries[i]->funcs->destroy_query) + hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]); FREE(hmq); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index 8453ce7..79c7023 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -153,7 +153,9 @@ static void nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq) { struct nv50_query *q = &hq->base; - q->funcs->destroy_query(nv50, q); + nv50_hw_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &hq->fence); + FREE(hq); } static boolean diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 1e4b75f..272e1d4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -405,6 +405,11 @@ nv50_screen_destroy(struct pipe_screen *pscreen) if (screen->blitter) nv50_blitter_destroy(screen); + if (screen->pm.prog) { + screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ + nv50_program_destroy(NULL, screen->pm.prog); + FREE(screen->pm.prog); + } nouveau_bo_ref(NULL, &screen->code); nouveau_bo_ref(NULL, &screen->tls_bo); @@ -518,11 +523,11 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) } BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1); - PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101); + PUSH_DATA(push, screen->base.drm->version >= 0x01000101); BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8); for (i = 0; i < 8; ++i) - PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101); + PUSH_DATA(push, screen->base.drm->version >= 0x01000101); BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); @@ -747,7 +752,7 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) return 1; } -struct pipe_screen * +struct nouveau_screen * nv50_screen_create(struct nouveau_device *dev) { struct nv50_screen *screen; @@ -762,6 +767,7 @@ nv50_screen_create(struct nouveau_device *dev) if (!screen) return NULL; pscreen = &screen->base.base; + pscreen->destroy = nv50_screen_destroy; ret = nouveau_screen_init(&screen->base, dev); if (ret) { @@ -782,7 +788,6 @@ nv50_screen_create(struct nouveau_device *dev) chan = screen->base.channel; - pscreen->destroy = nv50_screen_destroy; pscreen->context_create = nv50_create; pscreen->is_format_supported = nv50_screen_is_format_supported; pscreen->get_param = nv50_screen_get_param; @@ -961,11 +966,11 @@ nv50_screen_create(struct nouveau_device *dev) nouveau_fence_new(&screen->base, &screen->base.fence.current, false); - return pscreen; + return &screen->base; fail: - nv50_screen_destroy(pscreen); - return NULL; + screen->base.base.context_create = NULL; + return &screen->base; } int diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index 6083ea9..c3f4336 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -192,8 +192,7 @@ nv50_create_texture_view(struct pipe_context *pipe, tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; break; default: - NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); - return false; + unreachable("unexpected/invalid texture target"); } tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 85878d5..7de2f1f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -91,6 +91,9 @@ nv50_vertex_state_create(struct pipe_context *pipe, } so->element[i].state = nv50_format_table[fmt].vtx; so->need_conversion = true; + pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, + "Converting vertex element %d, no hw format %s", + i, util_format_name(ve->src_format)); } so->element[i].state |= i; diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c index 7a4670f..88655db 100644 --- a/src/gallium/drivers/nouveau/nv50/nv84_video.c +++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c @@ -756,8 +756,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec) int present, ret; if (!FIRMWARE_PRESENT(checked, VP_KERN)) { - nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj); - if (obj) + ret = nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj); + if (!ret) screen->firmware_info.profiles_present |= FIRMWARE_VP_KERN; nouveau_object_del(&obj); screen->firmware_info.profiles_checked |= FIRMWARE_VP_KERN; @@ -765,8 +765,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec) if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) { if (!FIRMWARE_PRESENT(checked, BSP_KERN)) { - nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj); - if (obj) + ret = nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj); + if (!ret) screen->firmware_info.profiles_present |= FIRMWARE_BSP_KERN; nouveau_object_del(&obj); screen->firmware_info.profiles_checked |= FIRMWARE_BSP_KERN; diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c index 20ea547..177a7e0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv98_video.c +++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c @@ -25,6 +25,8 @@ #include "util/u_sampler.h" #include "util/u_format.h" +#include <nvif/class.h> + static void nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder, struct pipe_video_buffer *video_target, @@ -56,6 +58,28 @@ nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder, nv98_decoder_ppp(dec, desc, target, comm_seq); } +static const struct nouveau_mclass +nv98_decoder_msvld[] = { + { G98_MSVLD, -1 }, + { IGT21A_MSVLD, -1 }, + { GT212_MSVLD, -1 }, + {} +}; + +static const struct nouveau_mclass +nv98_decoder_mspdec[] = { + { G98_MSPDEC, -1 }, + { GT212_MSPDEC, -1 }, + {} +}; + +static const struct nouveau_mclass +nv98_decoder_msppp[] = { + { G98_MSPPP, -1 }, + { GT212_MSPPP, -1 }, + {} +}; + struct pipe_video_codec * nv98_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ) @@ -103,12 +127,33 @@ nv98_create_decoder(struct pipe_context *context, } push = dec->pushbuf; - if (!ret) - ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp); - if (!ret) - ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp); - if (!ret) - ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp); + if (!ret) { + ret = nouveau_object_mclass(dec->channel[0], nv98_decoder_msvld); + if (ret >= 0) { + ret = nouveau_object_new(dec->channel[0], 0xbeef85b1, + nv98_decoder_msvld[ret].oclass, NULL, 0, + &dec->bsp); + } + } + + if (!ret) { + ret = nouveau_object_mclass(dec->channel[1], nv98_decoder_mspdec); + if (ret >= 0) { + ret = nouveau_object_new(dec->channel[1], 0xbeef85b2, + nv98_decoder_mspdec[ret].oclass, NULL, 0, + &dec->vp); + } + } + + if (!ret) { + ret = nouveau_object_mclass(dec->channel[2], nv98_decoder_msppp); + if (ret >= 0) { + ret = nouveau_object_new(dec->channel[2], 0xbeef85b3, + nv98_decoder_msppp[ret].oclass, NULL, 0, + &dec->ppp); + } + } + if (ret) goto fail; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 15991c3..ed1ac48 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -248,9 +248,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct nouveau_drm *drm = nouveau_screen(pscreen)->drm; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); struct pipe_resource *pt = &mt->base.base; - bool compressed = dev->drm_version >= 0x01000101; + bool compressed = drm->version >= 0x01000101; int ret; union nouveau_bo_config bo_config; uint32_t bo_flags; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 3845d61..7497317 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -184,7 +184,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; #endif - if (screen->base.device->drm_version >= 0x01000101) { + if (screen->base.drm->version >= 0x01000101) { if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { count += 2; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 90ee82f..a70d524 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -116,6 +116,12 @@ static void nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_hw_query *hq = nvc0_hw_query(q); + + if (hq->funcs && hq->funcs->destroy_query) { + hq->funcs->destroy_query(nvc0, hq); + return; + } + nvc0_hw_query_allocate(nvc0, q, 0); nouveau_fence_ref(NULL, &hq->fence); FREE(hq); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index 12fb609..7a64b69 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -293,7 +293,8 @@ nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0, unsigned i; for (i = 0; i < hmq->num_queries; i++) - hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); + if (hmq->queries[i]->funcs->destroy_query) + hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); FREE(hmq); } @@ -420,7 +421,10 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) { switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) { case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: - return sm20_hw_metric_calc_result(hq, res64); + /* (active_warps / active_cycles) / max. number of warps on a MP */ + if (res64[1]) + return (res64[0] / (double)res64[1]) / 64; + break; case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY: return sm20_hw_metric_calc_result(hq, res64); case NVE4_HW_METRIC_QUERY_INST_ISSUED: @@ -561,7 +565,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, uint16_t class_3d = screen->base.class_3d; int count = 0; - if (screen->base.device->drm_version >= 0x01000101) { + if (screen->base.drm->version >= 0x01000101) { if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { count += NVE4_HW_METRIC_QUERY_COUNT; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 7d1e75f..721857e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -782,7 +782,9 @@ static void nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) { struct nvc0_query *q = &hq->base; - q->funcs->destroy_query(nvc0, q); + nvc0_hw_query_allocate(nvc0, q, 0); + nouveau_fence_ref(NULL, &hq->fence); + FREE(hq); } static boolean @@ -1075,17 +1077,6 @@ nve4_hw_sm_query_read_data(uint32_t count[32][8], return true; } -/* Metric calculations: - * sum(x) ... sum of x over all MPs - * avg(x) ... average of x over all MPs - * - * IPC : sum(inst_executed) / clock - * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued) - * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles) - * MP_EFFICIENCY : avg(active_cycles / clock) - * - * NOTE: Interpretation of IPC requires knowledge of MP count. - */ static boolean nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query *hq, boolean wait, union pipe_query_result *result) @@ -1130,7 +1121,7 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type) struct nvc0_hw_query *hq; unsigned space; - if (nvc0->screen->base.device->drm_version < 0x01000101) + if (nvc0->screen->base.drm->version < 0x01000101) return NULL; if ((type < NVE4_HW_SM_QUERY(0) || type > NVE4_HW_SM_QUERY_LAST) && @@ -1225,7 +1216,7 @@ nvc0_hw_sm_get_driver_query_info(struct nvc0_screen *screen, unsigned id, { int count = 0; - if (screen->base.device->drm_version >= 0x01000101) { + if (screen->base.drm->version >= 0x01000101) { if (screen->compute) { if (screen->base.class_3d == NVE4_3D_CLASS) { count += NVE4_HW_SM_QUERY_COUNT; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 461fcaa..3995446 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -22,6 +22,7 @@ #include <xf86drm.h> #include <nouveau_drm.h> +#include <nvif/class.h> #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" @@ -428,6 +429,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) if (screen->pm.prog) { screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ nvc0_program_destroy(NULL, screen->pm.prog); + FREE(screen->pm.prog); } nouveau_bo_ref(NULL, &screen->text); @@ -617,11 +619,10 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen, #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ - nvc0_screen_destroy(pscreen); \ - return NULL; \ + goto fail; \ } while(0) -struct pipe_screen * +struct nouveau_screen * nvc0_screen_create(struct nouveau_device *dev) { struct nvc0_screen *screen; @@ -650,6 +651,7 @@ nvc0_screen_create(struct nouveau_device *dev) if (!screen) return NULL; pscreen = &screen->base.base; + pscreen->destroy = nvc0_screen_destroy; ret = nouveau_screen_init(&screen->base, dev); if (ret) { @@ -672,7 +674,6 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.vidmem_bindings = 0; } - pscreen->destroy = nvc0_screen_destroy; pscreen->context_create = nvc0_create; pscreen->is_format_supported = nvc0_screen_is_format_supported; pscreen->get_param = nvc0_screen_get_param; @@ -687,7 +688,7 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP; - if (dev->drm_version >= 0x01000202) + if (screen->base.drm->version >= 0x01000202) flags |= NOUVEAU_BO_COHERENT; ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo); @@ -699,12 +700,13 @@ nvc0_screen_create(struct nouveau_device *dev) screen->base.fence.update = nvc0_screen_fence_update; - ret = nouveau_object_new(chan, - (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, 0x906e, - NULL, 0, &screen->nvsw); + ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, + NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); if (ret) FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); + BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->nvsw->handle); switch (dev->chipset & ~0xf) { case 0x110: @@ -811,10 +813,11 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, 0x17); } - IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101); + IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), + screen->base.drm->version >= 0x01000101); BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8); for (i = 0; i < 8; ++i) - PUSH_DATA(push, dev->drm_version >= 0x01000101); + PUSH_DATA(push, screen->base.drm->version >= 0x01000101); BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); @@ -910,7 +913,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9)); - if (dev->drm_version >= 0x01000101) { + if (screen->base.drm->version >= 0x01000101) { ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); if (ret) { NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n"); @@ -1061,11 +1064,11 @@ nvc0_screen_create(struct nouveau_device *dev) nouveau_fence_new(&screen->base, &screen->base.fence.current, false); - return pscreen; + return &screen->base; fail: - nvc0_screen_destroy(pscreen); - return NULL; + screen->base.base.context_create = NULL; + return &screen->base; } int diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 7e2e999..5e84ca9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -236,11 +236,8 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *gp = nvc0->gmtyprog; - if (gp) - nvc0_program_validate(nvc0, gp); - /* we allow GPs with no code for specifying stream output state only */ - if (gp && gp->code_size) { + if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) { const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9)); BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index f8e1efb..4e43c4e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1030,9 +1030,11 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query, nvc0->cond_cond, nvc0->cond_mode); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + nouveau_scratch_done(&nvc0->base); nvc0->dirty = blit->saved.dirty | (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK | diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 2dd100f..74090ce 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -193,9 +193,7 @@ nvc0_create_texture_view(struct pipe_context *pipe, tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; break; default: - NOUVEAU_ERR("unexpected/invalid texture target: %d\n", - mt->base.base.target); - return false; + unreachable("unexpected/invalid texture target"); } tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index c464904..54443bd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -95,6 +95,9 @@ nvc0_vertex_state_create(struct pipe_context *pipe, } so->element[i].state = nvc0_format_table[fmt].vtx; so->need_conversion = true; + pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, + "Converting vertex element %d, no hw format %s", + i, util_format_name(ve->src_format)); } size = util_format_get_blocksize(fmt); |