summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4/vc4_qpu_emit.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-12-22 13:37:36 -0800
committerEric Anholt <eric@anholt.net>2016-01-06 12:39:51 -0800
commit71db7d3dc577e48da3689fd66989ec3b0a069089 (patch)
tree450fb776d1617cbdec4160d81a87480599c7dbb1 /src/gallium/drivers/vc4/vc4_qpu_emit.c
parent0a89f307f95de3a3357d834f36c60fe803895f8a (diff)
downloadexternal_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.zip
external_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.tar.gz
external_mesa3d-71db7d3dc577e48da3689fd66989ec3b0a069089.tar.bz2
vc4: Replace the SSA-style SEL operators with conditional MOVs.
I'm moving away from QIR being SSA (since NIR is doing lots of SSA optimization for us now) and instead having QIR just be QPU operations with virtual registers. By making our SELs be composed of two MOVs, we could potentially coalesce the registers for the MOV's src and dst and eliminate the MOV. total instructions in shared programs: 88448 -> 88028 (-0.47%) instructions in affected programs: 39845 -> 39425 (-1.05%) total estimated cycles in shared programs: 246306 -> 245762 (-0.22%) estimated cycles in affected programs: 162887 -> 162343 (-0.33%)
Diffstat (limited to 'src/gallium/drivers/vc4/vc4_qpu_emit.c')
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c49
1 files changed, 14 insertions, 35 deletions
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index cb4e0cf..b06702af 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -64,6 +64,12 @@ set_last_cond_add(struct vc4_compile *c, uint32_t cond)
*last_inst(c) = qpu_set_cond_add(*last_inst(c), cond);
}
+static void
+set_last_cond_mul(struct vc4_compile *c, uint32_t cond)
+{
+ *last_inst(c) = qpu_set_cond_mul(*last_inst(c), cond);
+}
+
/**
* Some special registers can be read from either file, which lets us resolve
* raddr conflicts without extra MOVs.
@@ -306,42 +312,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
break;
}
- switch (qinst->op) {
- case QOP_SEL_X_0_ZS:
- case QOP_SEL_X_0_ZC:
- case QOP_SEL_X_0_NS:
- case QOP_SEL_X_0_NC:
- case QOP_SEL_X_0_CS:
- case QOP_SEL_X_0_CC:
- queue(c, qpu_a_MOV(dst, src[0]) | unpack);
- set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
- QPU_COND_ZS);
-
- queue(c, qpu_a_XOR(dst, qpu_r0(), qpu_r0()));
- set_last_cond_add(c, ((qinst->op - QOP_SEL_X_0_ZS) ^
- 1) + QPU_COND_ZS);
- break;
-
- case QOP_SEL_X_Y_ZS:
- case QOP_SEL_X_Y_ZC:
- case QOP_SEL_X_Y_NS:
- case QOP_SEL_X_Y_NC:
- case QOP_SEL_X_Y_CS:
- case QOP_SEL_X_Y_CC:
- queue(c, qpu_a_MOV(dst, src[0]));
- if (qinst->src[0].pack)
- *(last_inst(c)) |= unpack;
- set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
- QPU_COND_ZS);
-
- queue(c, qpu_a_MOV(dst, src[1]));
- if (qinst->src[1].pack)
- *(last_inst(c)) |= unpack;
- set_last_cond_add(c, ((qinst->op - QOP_SEL_X_Y_ZS) ^
- 1) + QPU_COND_ZS);
-
- break;
+ bool handled_qinst_cond = true;
+ switch (qinst->op) {
case QOP_RCP:
case QOP_RSQ:
case QOP_EXP2:
@@ -497,16 +470,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_m_alu2(translate[qinst->op].op,
dst,
src[0], src[1]) | unpack);
+ set_last_cond_mul(c, qinst->cond);
} else {
queue(c, qpu_a_alu2(translate[qinst->op].op,
dst,
src[0], src[1]) | unpack);
+ set_last_cond_add(c, qinst->cond);
}
+ handled_qinst_cond = true;
set_last_dst_pack(c, qinst);
break;
}
+ assert(qinst->cond == QPU_COND_ALWAYS ||
+ handled_qinst_cond);
+
if (qinst->sf) {
assert(!qir_is_multi_instruction(qinst));
*last_inst(c) |= QPU_SF;