From b4b20d42f6a8cd5aec3ba529a0b8d6ea22e73305 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 25 Apr 2014 22:40:42 -0400 Subject: nvc0/ir: add support for new bitfield manipulation opcodes This adds support for: IBFE, UBFE, BFI, LSB, IMSB, UMSB, BREV, POPC Which are all required for ARB_gs5 support. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir.h | 2 + .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 35 ++++++++++++ .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 17 ++++++ .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 64 +++++++++++++++++++++- .../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 + .../drivers/nouveau/codegen/nv50_ir_target.cpp | 5 +- .../nouveau/codegen/nv50_ir_target_nvc0.cpp | 7 ++- 7 files changed, 127 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index c57729e..919d3a4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -143,6 +143,7 @@ enum operation OP_POPCNT, // bitcount(src0 & src1) OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK + OP_BFIND, // find highest/lowest set bit OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) OP_ATOM, OP_BAR, // execution barrier, sources = { id, thread count, predicate } @@ -171,6 +172,7 @@ enum operation #define NV50_IR_SUBOP_TEXBAR(n) n #define NV50_IR_SUBOP_MOV_FINAL 1 #define NV50_IR_SUBOP_EXTBF_REV 1 +#define NV50_IR_SUBOP_BFIND_SAMT 1 #define NV50_IR_SUBOP_PERMT_F4E 1 #define NV50_IR_SUBOP_PERMT_B4E 2 #define NV50_IR_SUBOP_PERMT_RC8 3 diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index c258b6b..63d5525 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -94,6 +94,8 @@ private: void emitLogicOp(const Instruction *, uint8_t subOp); void emitPOPC(const Instruction *); void emitINSBF(const Instruction *); + void emitEXTBF(const Instruction *); + void emitBFIND(const Instruction *); void emitShift(const Instruction *); void emitSFnOp(const Instruction *, uint8_t subOp); @@ -696,6 +698,30 @@ CodeEmitterGK110::emitINSBF(const Instruction *i) } void +CodeEmitterGK110::emitEXTBF(const Instruction *i) +{ + emitForm_21(i, 0x600, 0xc00); + + if (i->dType == TYPE_S32) + code[1] |= 0x80000; + if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) + code[1] |= 0x800; +} + +void +CodeEmitterGK110::emitBFIND(const Instruction *i) +{ + emitForm_21(i, 0x618, 0xc18); + + if (i->dType == TYPE_S32) + code[1] |= 0x80000; + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) + code[1] |= 0x800; + if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT) + code[1] |= 0x1000; +} + +void CodeEmitterGK110::emitShift(const Instruction *i) { if (i->op == OP_SHR) { @@ -1725,6 +1751,15 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_POPCNT: emitPOPC(insn); break; + case OP_INSBF: + emitINSBF(insn); + break; + case OP_EXTBF: + emitEXTBF(insn); + break; + case OP_BFIND: + emitBFIND(insn); + break; case OP_JOIN: emitNOP(insn); insn->join = 1; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index cef92cf..11a7c2b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -104,6 +104,7 @@ private: void emitPOPC(const Instruction *); void emitINSBF(const Instruction *); void emitEXTBF(const Instruction *); + void emitBFIND(const Instruction *); void emitPERMT(const Instruction *); void emitShift(const Instruction *); @@ -804,6 +805,19 @@ CodeEmitterNVC0::emitEXTBF(const Instruction *i) } void +CodeEmitterNVC0::emitBFIND(const Instruction *i) +{ + emitForm_B(i, HEX64(78000000, 00000003)); + + if (i->dType == TYPE_S32) + code[0] |= 1 << 5; + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) + code[0] |= 1 << 8; + if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT) + code[0] |= 1 << 6; +} + +void CodeEmitterNVC0::emitPERMT(const Instruction *i) { emitForm_A(i, HEX64(24000000, 00000004)); @@ -2382,6 +2396,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_EXTBF: emitEXTBF(insn); break; + case OP_BFIND: + emitBFIND(insn); + break; case OP_PERMT: emitPERMT(insn); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index fc418bf..05a79a3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -418,6 +418,8 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_ATOMXOR: case TGSI_OPCODE_ATOMUMIN: case TGSI_OPCODE_ATOMUMAX: + case TGSI_OPCODE_UBFE: + case TGSI_OPCODE_UMSB: return nv50_ir::TYPE_U32; case TGSI_OPCODE_I2F: case TGSI_OPCODE_IDIV: @@ -434,6 +436,8 @@ nv50_ir::DataType Instruction::inferSrcType() const case TGSI_OPCODE_UARL: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_IMSB: return nv50_ir::TYPE_S32; default: return nv50_ir::TYPE_F32; @@ -625,6 +629,15 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(TXB2, TXB); NV50_IR_OPCODE_CASE(TXL2, TXL); + NV50_IR_OPCODE_CASE(IBFE, EXTBF); + NV50_IR_OPCODE_CASE(UBFE, EXTBF); + NV50_IR_OPCODE_CASE(BFI, INSBF); + NV50_IR_OPCODE_CASE(BREV, EXTBF); + NV50_IR_OPCODE_CASE(POPC, POPCNT); + NV50_IR_OPCODE_CASE(LSB, BFIND); + NV50_IR_OPCODE_CASE(IMSB, BFIND); + NV50_IR_OPCODE_CASE(UMSB, BFIND); + NV50_IR_OPCODE_CASE(END, EXIT); default: @@ -2137,7 +2150,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) Instruction *geni; Value *dst0[4], *rDst0[4]; - Value *src0, *src1, *src2; + Value *src0, *src1, *src2, *src3; Value *val0, *val1; int c; @@ -2688,6 +2701,55 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_ATOMIMAX: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; + case TGSI_OPCODE_IBFE: + case TGSI_OPCODE_UBFE: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); + mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); + } + break; + case TGSI_OPCODE_BFI: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + src1 = fetchSrc(1, c); + src2 = fetchSrc(2, c); + src3 = fetchSrc(3, c); + mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); + mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); + } + break; + case TGSI_OPCODE_LSB: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); + geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } + break; + case TGSI_OPCODE_IMSB: + case TGSI_OPCODE_UMSB: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + mkOp1(OP_BFIND, srcTy, dst0[c], src0); + } + break; + case TGSI_OPCODE_BREV: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); + geni->subOp = NV50_IR_SUBOP_EXTBF_REV; + } + break; + case TGSI_OPCODE_POPC: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0); + } + break; default: ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); assert(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 42013e5..f788c72 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -173,6 +173,7 @@ const char *operationStr[OP_LAST + 1] = "popcnt", "insbf", "extbf", + "bfind", "permt", "atom", "bar", diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index f479cf4..4ca5687 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -50,7 +50,7 @@ const uint8_t Target::operationSrcNr[] = 0, // TEXBAR 1, 1, // DFDX, DFDY 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP - 2, 3, 2, 3, // POPCNT, INSBF, EXTBF, PERMT + 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT 2, 2, // ATOM, BAR 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL @@ -115,8 +115,9 @@ const OpClass Target::operationClass[] = // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, - // POPCNT, INSBF, EXTBF, PERMT + // POPCNT, INSBF, EXTBF, BFIND; PERMT OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + OPCLASS_BITFIELD, // ATOM, BAR OPCLASS_ATOMIC, OPCLASS_CONTROL, // VADD, VAVG, VMIN, VMAX diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index c747f3e..395d5b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -124,7 +124,10 @@ static const struct opProperties _initProps[] = { OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, { OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, { OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, - { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4 }, + { OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 }, + { OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, + { OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, + { OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 }, { OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, { OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, { OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, @@ -393,6 +396,8 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const case OP_AND: case OP_OR: case OP_XOR: + case OP_POPCNT: + case OP_BFIND: break; case OP_SET: if (insn->sType != TYPE_F32) -- cgit v1.1