diff options
author | Eric Anholt <eric@anholt.net> | 2015-08-05 20:05:56 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2015-10-26 16:48:34 -0700 |
commit | 3359ad6cda49fb977d837eb00e8ae4d781d95c2a (patch) | |
tree | cac61e2a26a4edfce06bafa7f48e5b58a3abbc1d /src/gallium/drivers/vc4/vc4_qpu_emit.c | |
parent | 01ca4f207efac555ff5f729dce1687a68ba65400 (diff) | |
download | external_mesa3d-3359ad6cda49fb977d837eb00e8ae4d781d95c2a.zip external_mesa3d-3359ad6cda49fb977d837eb00e8ae4d781d95c2a.tar.gz external_mesa3d-3359ad6cda49fb977d837eb00e8ae4d781d95c2a.tar.bz2 |
vc4: Add support for copy propagation with unpack flags present.
total instructions in shared programs: 89251 -> 87862 (-1.56%)
instructions in affected programs: 52971 -> 51582 (-2.62%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qpu_emit.c')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 61 |
1 files changed, 43 insertions, 18 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index d06f8b2..133e138 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -101,7 +101,8 @@ swap_file(struct qpu_reg *src) static void fixup_raddr_conflict(struct vc4_compile *c, struct qpu_reg dst, - struct qpu_reg *src0, struct qpu_reg *src1) + struct qpu_reg *src0, struct qpu_reg *src1, + struct qinst *inst, uint64_t *unpack) { uint32_t mux0 = src0->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src0->mux; uint32_t mux1 = src1->mux == QPU_MUX_SMALL_IMM ? QPU_MUX_B : src1->mux; @@ -117,7 +118,21 @@ fixup_raddr_conflict(struct vc4_compile *c, return; if (mux0 == QPU_MUX_A) { - queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + /* Make sure we use the same type of MOV as the instruction, + * in case of unpacks. + */ + if (qir_is_float_input(inst)) + queue(c, qpu_a_FMAX(qpu_rb(31), *src0, *src0)); + else + queue(c, qpu_a_MOV(qpu_rb(31), *src0)); + + /* If we had an unpack on this A-file source, we need to put + * it into this MOV, not into the later move from regfile B. + */ + if (inst->src[0].pack) { + *last_inst(c) |= *unpack; + *unpack = 0; + } *src0 = qpu_rb(31); } else { queue(c, qpu_a_MOV(qpu_ra(31), *src0)); @@ -296,7 +311,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_0_ZC: case QOP_SEL_X_0_NS: case QOP_SEL_X_0_NC: - queue(c, qpu_a_MOV(dst, src[0])); + queue(c, qpu_a_MOV(dst, src[0]) | unpack); set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS + QPU_COND_ZS); @@ -310,10 +325,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_SEL_X_Y_NS: case QOP_SEL_X_Y_NC: queue(c, qpu_a_MOV(dst, src[0])); + if (qinst->src[0].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS + QPU_COND_ZS); queue(c, qpu_a_MOV(dst, src[1])); + if (qinst->src[1].pack) + *(last_inst(c)) |= unpack; set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^ 1) + QPU_COND_ZS); @@ -326,19 +345,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) switch (qinst->op) { case QOP_RCP: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIP), - src[0])); + src[0]) | unpack); break; case QOP_RSQ: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_RECIPSQRT), - src[0])); + src[0]) | unpack); break; case QOP_EXP2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_EXP), - src[0])); + src[0]) | unpack); break; case QOP_LOG2: queue(c, qpu_a_MOV(qpu_rb(QPU_W_SFU_LOG), - src[0])); + src[0]) | unpack); break; default: abort(); @@ -373,16 +392,19 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TLB_DISCARD_SETUP: discard = true; - queue(c, qpu_a_MOV(src[0], src[0])); + queue(c, qpu_a_MOV(src[0], src[0]) | unpack); *last_inst(c) |= QPU_SF; break; case QOP_TLB_STENCIL_SETUP: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0])); + assert(!unpack); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), + src[0]) | unpack); break; case QOP_TLB_Z_WRITE: - queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0])); + queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), + src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } @@ -398,14 +420,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QOP_TLB_COLOR_WRITE: - queue(c, qpu_a_MOV(qpu_tlbc(), src[0])); + queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack); if (discard) { set_last_cond_add(c, QPU_COND_ZS); } break; case QOP_VARY_ADD_C: - queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); + queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack); break; case QOP_TEX_S: @@ -414,12 +436,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_TEX_B: queue(c, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S + (qinst->op - QOP_TEX_S)), - src[0])); + src[0]) | unpack); break; case QOP_TEX_DIRECT: - fixup_raddr_conflict(c, dst, &src[0], &src[1]); - queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); + queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), + src[0], src[1]) | unpack); break; case QOP_TEX_RESULT: @@ -447,16 +471,17 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) if (qir_get_op_nsrc(qinst->op) == 1) src[1] = src[0]; - fixup_raddr_conflict(c, dst, &src[0], &src[1]); + fixup_raddr_conflict(c, dst, &src[0], &src[1], + qinst, &unpack); if (qir_is_mul(qinst)) { queue(c, qpu_m_alu2(translate[qinst->op].op, dst, - src[0], src[1])); + src[0], src[1]) | unpack); } else { queue(c, qpu_a_alu2(translate[qinst->op].op, dst, - src[0], src[1])); + src[0], src[1]) | unpack); } set_last_dst_pack(c, qinst); |